From e4805fea58e352a1dbb85f337ef7e7ab5a436703 Mon Sep 17 00:00:00 2001 From: Dmytro Bavykin <6aBblKuH@gmail.com> Date: Sat, 20 Sep 2025 21:06:44 +0300 Subject: [PATCH] ruby RAG example with a console chat app added --- .../ruby_rag_console_chat_app/.ruby-gemset | 1 + .../ruby_rag_console_chat_app/Gemfile | 9 ++ .../ruby_rag_console_chat_app/Gemfile.lock | 86 ++++++++++++ .../ruby_rag_console_chat_app/README.md | 90 ++++++++++++ .../ruby_rag_console_chat_app/bin/console | 6 + .../ruby_rag_console_chat_app/chromadb/.keep | 0 .../docker-compose.yml | 12 ++ .../ruby_rag_console_chat_app/initializer.rb | 12 ++ .../knowledge_base/company/about.md | 15 ++ .../knowledge_base/company/careers.md | 22 +++ .../knowledge_base/company/overview.md | 37 +++++ .../knowledge_base/contracts/ios_project.md | 34 +++++ .../knowledge_base/contracts/ml_project.md | 35 +++++ .../knowledge_base/contracts/web_project.md | 35 +++++ .../knowledge_base/employees/alex_johnson.md | 54 ++++++++ .../knowledge_base/employees/emma_clark.md | 36 +++++ .../knowledge_base/employees/li_wang.md | 54 ++++++++ .../knowledge_base/employees/oliver_smith.md | 54 ++++++++ .../employees/sophia_martinez.md | 54 ++++++++ .../knowledge_base/products/ios_app_dev.md | 29 ++++ .../knowledge_base/products/ml_solutions.md | 36 +++++ .../products/web_dev_services.md | 37 +++++ .../ruby_rag_console_chat_app/main.rb | 68 +++++++++ .../ruby_rag_console_chat_app/seed.rb | 129 ++++++++++++++++++ 24 files changed, 945 insertions(+) create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/.ruby-gemset create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/Gemfile create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/Gemfile.lock create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/README.md create mode 100755 week5/community-contributions/ruby_rag_console_chat_app/bin/console create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/chromadb/.keep create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/docker-compose.yml create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/initializer.rb create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/about.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/careers.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/overview.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ios_project.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ml_project.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/web_project.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/alex_johnson.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/emma_clark.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/li_wang.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/oliver_smith.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/sophia_martinez.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ios_app_dev.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ml_solutions.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/web_dev_services.md create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/main.rb create mode 100644 week5/community-contributions/ruby_rag_console_chat_app/seed.rb diff --git a/week5/community-contributions/ruby_rag_console_chat_app/.ruby-gemset b/week5/community-contributions/ruby_rag_console_chat_app/.ruby-gemset new file mode 100644 index 0000000..051ffe6 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/.ruby-gemset @@ -0,0 +1 @@ +ruby_rag_test \ No newline at end of file diff --git a/week5/community-contributions/ruby_rag_console_chat_app/Gemfile b/week5/community-contributions/ruby_rag_console_chat_app/Gemfile new file mode 100644 index 0000000..30b9ff9 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/Gemfile @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +gem 'ruby-openai' +gem 'chroma-db', github: 'koic/chroma', branch: 'support_chroma_v2_api' # Using fork because original one comes with outdated API +gem 'pry-byebug' +gem 'ruby-next' # to auto-transpile the required files from source on load. Otherwise the code from chroma-db may not work correctly +gem 'pragmatic_segmenter' \ No newline at end of file diff --git a/week5/community-contributions/ruby_rag_console_chat_app/Gemfile.lock b/week5/community-contributions/ruby_rag_console_chat_app/Gemfile.lock new file mode 100644 index 0000000..8cd5f68 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/Gemfile.lock @@ -0,0 +1,86 @@ +GIT + remote: https://github.com/koic/chroma.git + revision: c457cb0541dffeb4386a8d4d47eaaa68608d2561 + branch: support_chroma_v2_api + specs: + chroma-db (0.8.2) + dry-monads (~> 1.6) + ruby-next (~> 1.0, >= 1.0.3) + zeitwerk (~> 2.6.0) + +GEM + remote: https://rubygems.org/ + specs: + ast (2.4.3) + byebug (12.0.0) + coderay (1.1.3) + concurrent-ruby (1.3.5) + diff-lcs (1.6.2) + dry-core (1.1.0) + concurrent-ruby (~> 1.0) + logger + zeitwerk (~> 2.6) + dry-monads (1.9.0) + concurrent-ruby (~> 1.0) + dry-core (~> 1.1) + zeitwerk (~> 2.6) + event_stream_parser (1.0.0) + faraday (2.13.4) + faraday-net_http (>= 2.0, < 3.5) + json + logger + faraday-multipart (1.1.1) + multipart-post (~> 2.0) + faraday-net_http (3.4.1) + net-http (>= 0.5.0) + json (2.13.2) + logger (1.7.0) + method_source (1.1.0) + multipart-post (2.4.1) + net-http (0.6.0) + uri + paco (0.2.3) + parser (3.3.9.0) + ast (~> 2.4.1) + racc + pragmatic_segmenter (0.3.24) + pry (0.15.2) + coderay (~> 1.1) + method_source (~> 1.0) + pry-byebug (3.11.0) + byebug (~> 12.0) + pry (>= 0.13, < 0.16) + racc (1.8.1) + require-hooks (0.2.2) + ruby-next (1.1.2) + paco (~> 0.2) + require-hooks (~> 0.2) + ruby-next-core (= 1.1.2) + ruby-next-parser (>= 3.4.0.2) + unparser (~> 0.6.0) + ruby-next-core (1.1.2) + ruby-next-parser (3.4.0.2) + parser (>= 3.0.3.1) + ruby-openai (8.1.0) + event_stream_parser (>= 0.3.0, < 2.0.0) + faraday (>= 1) + faraday-multipart (>= 1) + unparser (0.6.15) + diff-lcs (~> 1.3) + parser (>= 3.3.0) + uri (1.0.3) + zeitwerk (2.6.18) + +PLATFORMS + arm64-darwin-23 + ruby + +DEPENDENCIES + chroma-db! + pragmatic_segmenter + pry-byebug + ruby-next + ruby-openai + +BUNDLED WITH + 2.6.8 diff --git a/week5/community-contributions/ruby_rag_console_chat_app/README.md b/week5/community-contributions/ruby_rag_console_chat_app/README.md new file mode 100644 index 0000000..ca11a57 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/README.md @@ -0,0 +1,90 @@ +# Local RAG Knowledge Base with Llama 3.2, Chroma, and Ruby +![Diagram](./doc/ruby_rag_diagram.png) + +## Project Overview + +This mini project demonstrates building a local Retrieval-Augmented Generation (RAG) system using the Llama 3.2 language model (via Ollama locally), Chroma vector database, and Ruby. The project includes: + +- Reading and processing a knowledge base consisting of markdown files organized in folders. +- Splitting documents into context-preserving chunks using the `pragmatic_segmenter` gem for sentence-aware chunking. +- Generating semantic embeddings of chunks with Llama 3.2 local model via an OpenAI-compatible API (Ollama). +- Storing embeddings along with metadata and documents into a Chroma vector database collection. +- Performing similarity search in Chroma on user queries to retrieve relevant context. +- Constructing a conversational prompt by combining chat history, retrieved context, and user input. +- Streaming responses from the Llama 3.2 model back to the console for real-time interaction. + +--- + +## What Has Been Done + +- **Folder crawler and document loader:** + Recursively read all markdown files in the `knowledge_base` directory, assigning document type metadata from folder names. + +- **Smart text chunking:** + Integrated the [`pragmatic_segmenter`](https://github.com/diasks2/pragmatic_segmenter) gem to split texts into sentence-safe chunks (~1000 characters) with overlaps to preserve context and avoid cutting sentences unnaturally. + +- **Embeddings generation:** + Leveraged the local Llama 3.2 model via Ollama's OpenAI-compatible streaming API to generate embeddings of all text chunks, enabling efficient semantic search. + +- **Chroma vector store integration:** + Used the `chroma-db` Ruby gem with a locally hosted Chroma server (via Docker Compose) to store embeddings and metadata, and to perform similarity search queries. + +- **Interactive conversational loop:** + Maintained chat history manually as an array of message hashes, combined with relevant retrieved chunks to form prompts fed into the Llama 3.2 model. + +- **Streaming chat responses:** + Implemented real-time streaming of LLM output to the console leveraging the Ruby OpenAI gem streaming feature and Ollama's compatible API. + +--- + +## Tricky Findings & Gotchas + +- **Ruby Next transpilation required for `chroma-db` gem:** + The gem uses modern Ruby features and requires `gem 'ruby-next'` with `require "ruby-next/language/runtime"` loaded early to avoid LoadErrors. + +- **Chroma API version compatibility:** + Different Chroma server versions expose different API versions (`v1` vs `v2`). The `chroma-db` Ruby gem expected v2 endpoints. Using matched versions of Chroma server and the gem, or a forked gem branch with v2 support, was crucial. + +- **Bundler context for scripts:** + Running scripts must be done with `bundle exec` or with `require 'bundler/setup'` to load local gem dependencies correctly (especially forked gems). + +- **Manual management of conversational memory:** + Unlike Python LangChain, no high-level Ruby library exists for conversation memory or RAG chains, so that had to be implemented as arrays of messages, and prompt assembly was manual. + +- **Text chunking with `pragmatic_segmenter`:** + Using sentence segmentation improved context retention significantly over naïve character splitting, but required careful assembly of chunks and overlaps. + +- **Streaming outputs handled via custom block in Ruby OpenAI gem:** + Streaming integration required capturing delta chunks from the streaming API and printing them in realtime, instead of waiting for full response. + +--- + +## Setup Instructions + +### Requirements + +- Ruby 3.2.x +- Bundler +- Docker & Docker Compose +- Ollama installed and running locally with the `llama3.2` model pulled +- Basic terminal shell (macOS, Linux recommended) + +### Steps + +1. **Clone/Fork the repository:** +2. Run `bundle install` to install Ruby dependencies +3. Run `docker compose up -d` to boot up Chroma DB +4. Run `ollama run llama3.2` to boot up Open Source LLM +5. Run `bundle exec ruby seed.rb` to seed Chroma DB with chunks of data from `knowledge_base` folder +6. Run `bundle exec ruby main.rb` to start actual conversation + +### Questions to try on +1. What is the company name? +2. When the company was establised? +3. Which techologies does the company use? +4. Tell me the emplooees` names and their titles? +5. Who knows how to work with IOS? +6. Tell me who was the client for web project? +7. Is the company looking for IOS developer? + +Then you can compare it with the actual knowledge base. \ No newline at end of file diff --git a/week5/community-contributions/ruby_rag_console_chat_app/bin/console b/week5/community-contributions/ruby_rag_console_chat_app/bin/console new file mode 100755 index 0000000..47a7a6e --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/bin/console @@ -0,0 +1,6 @@ +#!/usr/bin/env ruby +require 'irb' +require 'irb/completion' +require_relative '../initializer' + +IRB.start \ No newline at end of file diff --git a/week5/community-contributions/ruby_rag_console_chat_app/chromadb/.keep b/week5/community-contributions/ruby_rag_console_chat_app/chromadb/.keep new file mode 100644 index 0000000..e69de29 diff --git a/week5/community-contributions/ruby_rag_console_chat_app/docker-compose.yml b/week5/community-contributions/ruby_rag_console_chat_app/docker-compose.yml new file mode 100644 index 0000000..1ad44f4 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/docker-compose.yml @@ -0,0 +1,12 @@ +services: + chromadb: + image: chromadb/chroma:latest + ports: + - "8000:8000" + environment: + - IS_PERSISTENT=TRUE + - PERSIST_DIRECTORY=/chroma/chroma + - ANONYMIZED_TELEMETRY=TRUE + volumes: + - ./chromadb:/chroma/chroma + restart: unless-stopped diff --git a/week5/community-contributions/ruby_rag_console_chat_app/initializer.rb b/week5/community-contributions/ruby_rag_console_chat_app/initializer.rb new file mode 100644 index 0000000..9e9f7e2 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/initializer.rb @@ -0,0 +1,12 @@ +require 'pathname' +require 'pry' +require 'openai' +require "ruby-next/language/runtime" +require 'chroma-db' +require 'logger' +require 'json' + +Chroma.connect_host = "http://localhost:8000" +Chroma.api_version = "v2" +Chroma.logger = Logger.new($stdout) +Chroma.log_level = Chroma::LEVEL_ERROR \ No newline at end of file diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/about.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/about.md new file mode 100644 index 0000000..8f13771 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/about.md @@ -0,0 +1,15 @@ +# About TechNova Solutions + +TechNova Solutions is a technology services company specializing in **web development**, **iOS mobile app development**, and **machine learning solutions**. Founded in 2015, we have grown into a trusted partner for startups, medium-sized businesses, and enterprises worldwide. + +## Mission +To empower businesses with cutting-edge software solutions that combine usability, scalability, and intelligence. + +## Vision +To be a global leader in delivering high-quality digital products that transform industries. + +## Values +- Innovation: Continuously adopting new technologies. +- Collaboration: Building strong partnerships with clients. +- Excellence: Maintaining high standards in every project. +- Growth: Creating opportunities for both employees and clients. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/careers.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/careers.md new file mode 100644 index 0000000..0343958 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/careers.md @@ -0,0 +1,22 @@ +# Careers at TechNova Solutions + +We are always looking for passionate and skilled professionals to join our growing team. Our company culture emphasizes **learning**, **innovation**, and **collaboration**. + +## Current Openings +- **iOS Developer** + - Skills: Swift, SwiftUI, UIKit, experience with App Store deployment. +- **Web Developer** + - Skills: React.js, Node.js, TypeScript, REST API integration. +- **Machine Learning Engineer** + - Skills: Python, TensorFlow/PyTorch, data pipelines, NLP, computer vision. +- **Project Manager** + - Skills: Agile methodologies, client communication, technical understanding. + +## Why Work With Us? +- Remote-friendly with flexible working hours. +- Opportunities to work on international projects. +- Professional development budget for training and conferences. +- Inclusive and diverse work culture. + +## How to Apply +Send your CV and portfolio to **careers@technova.com** diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/overview.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/overview.md new file mode 100644 index 0000000..f18806e --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/company/overview.md @@ -0,0 +1,37 @@ +# Company Overview: TechNova Solutions + +## Industry +Software Development and Consulting + +## Headquarters +Berlin, Germany (with remote teams worldwide) + +## Services +TechNova Solutions provides: +1. **Web Development** + - Full-stack web solutions + - Modern scalable architectures + - E-commerce and SaaS platforms + +2. **iOS App Development** + - Custom mobile applications + - Swift/SwiftUI-based native solutions + - App Store deployment and maintenance + +3. **Machine Learning Development** + - Predictive analytics + - Natural Language Processing (NLP) models + - Computer vision and automation solutions + +## Client Base +- Startups launching MVPs +- Growing SaaS companies +- Enterprises requiring digital transformation + +## Key Achievements +- Delivered over **150 digital projects** across industries. +- Worked with clients in **FinTech, Healthcare, and E-commerce**. +- Recognized as a **Top B2B Tech Firm in 2023** by Clutch. + +## Culture +Our team thrives on **technical challenges** and believes in **continuous improvement**. We foster a work environment where creativity meets execution. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ios_project.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ios_project.md new file mode 100644 index 0000000..b5072e5 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ios_project.md @@ -0,0 +1,34 @@ +# iOS App Development Agreement + +**Parties:** +Client: Acme Innovations LLC +Developer: TechNova Solutions + +## Project Scope +TechNova Solutions will design, develop, and deploy a custom iOS mobile application for Acme Innovations, including user authentication, dashboard, push notifications, and App Store submission. + +## Deliverables +- iOS app (Swift/SwiftUI) for iPhone +- Admin dashboard (web-based) +- User and technical documentation +- 2 post-launch maintenance updates + +## Timeline +- Project kickoff: October 1, 2025 +- Beta delivery: December 1, 2025 +- Final delivery: January 15, 2026 + +## Payment Terms +- 30% upfront +- 40% upon beta delivery +- 30% upon final delivery + +## Intellectual Property +All source code and produced assets are transferred to the client upon final payment. + +## Confidentiality +Both parties will maintain strict confidentiality regarding proprietary information. + +## Signatures +_Client Rep:_ _____________________ +_TechNova Rep:_ ___________________ diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ml_project.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ml_project.md new file mode 100644 index 0000000..c8a11ae --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/ml_project.md @@ -0,0 +1,35 @@ +# Machine Learning Project Agreement + +**Parties:** +Client: HealthPlus Analytics +Developer: TechNova Solutions + +## Objective +TechNova Solutions will develop a predictive analytics solution using machine learning models for healthcare data, aimed at reducing patient readmission rates. + +## Statement of Work +- Data preprocessing and anonymization +- Model development (Python, TensorFlow, scikit-learn) +- Evaluation and performance tuning +- Deployment and staff training +- Integration with client’s data system + +## Timeline +- Data delivery: November 1, 2025 +- Prototype model: January 10, 2026 +- Final delivery: February 20, 2026 + +## Payment Schedule +- 25% upon signing +- 50% upon prototype +- 25% upon final acceptance + +## Confidentiality & Data Privacy +Strict data privacy and NDA agreement. No data shared outside of project scope. + +## Intellectual Property +Custom ML models and documentation are transferred to the client at project completion. + +## Signatures +_Client Rep:_ _____________________ +_TechNova Rep:_ ___________________ diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/web_project.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/web_project.md new file mode 100644 index 0000000..3c74095 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/contracts/web_project.md @@ -0,0 +1,35 @@ +# Web Development Services Agreement + +**Parties:** +Client: BlueSky E-Commerce +Developer: TechNova Solutions + +## Project Description +TechNova Solutions will build a responsive e-commerce web platform with integrated payment gateway, product catalog, and analytics dashboard for BlueSky E-Commerce. + +## Scope of Work +- Modern UX/UI design +- Frontend (React.js), backend (Node.js, PostgreSQL) +- Payment integration (Stripe/PayPal) +- Mobile-responsive design +- 3 months maintenance & support + +## Project Phases +1. Specification & wireframing +2. Development & staging +3. Testing & deployment +4. Support period + +## Pricing & Payment +- Total: €25,000 +- 40% upon contract start, 40% after development milestone, 20% after deployment + +## Intellectual Property +All code, documentation, and design assets become property of the client post-final payment. + +## Dispute Resolution +Any disputes to be resolved via arbitration in Berlin, Germany. + +## Signatures +_Client Rep:_ _____________________ +_TechNova Rep:_ ___________________ diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/alex_johnson.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/alex_johnson.md new file mode 100644 index 0000000..41c92f2 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/alex_johnson.md @@ -0,0 +1,54 @@ +# HR Record + +# Alex Johnson + +## Summary +- **Date of Birth**: May 10, 1990 +- **Job Title**: iOS Engineer +- **Location**: Berlin, Germany + +## Career Progression +- **March 2015**: Joined TechNova Solutions as Junior iOS Developer, focused on building and refining mobile features. +- **July 2017**: Promoted to iOS Engineer, led SwiftUI adoption in new projects. +- **November 2019**: Spearheaded the launch of the flagship iOS app with 100k+ downloads. +- **August 2023**: Mentors junior iOS developers and contributes to open-source Swift libraries. + +## Annual Performance History +- **2021**: + - **Performance Rating**: 4.6/5 + - **Key Achievements**: Implemented a new push notification system improving user retention by 15%. + +- **2022**: + - **Performance Rating**: 4.8/5 + - **Key Achievements**: Led successful iOS 16 app updates supporting new platform features. + +- **2023**: + - **Performance Rating**: 4.7/5 + - **Key Achievements**: Reduced app crash rate by 30% with detailed runtime diagnostics tools. + +## Compensation History +- **2021**: + - **Base Salary**: €65,000 + - **Bonus**: €7,000 + +- **2022**: + - **Base Salary**: €70,000 + - **Bonus**: €8,000 + +- **2023**: + - **Base Salary**: €75,000 + - **Bonus**: €10,000 + +## Other HR Notes +- **Training Completed**: + - Advanced SwiftUI Workshop (2022) + - iOS Performance Optimization Course (2023) + +- **Awards**: + - Employee of the Quarter Q4 2022 + +- **Interests**: + - Open-source iOS community, mountain biking. + +- **Feedback from HR**: + - Alex is highly reliable, innovative, and a technical leader on the iOS team. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/emma_clark.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/emma_clark.md new file mode 100644 index 0000000..f430761 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/emma_clark.md @@ -0,0 +1,36 @@ +# HR Record + +# Emma Clark + +## Summary +- **Date of Birth**: December 1, 1994 +- **Job Title**: Junior iOS Developer +- **Location**: Remote (Barcelona, Spain) + +## Career Progression +- **January 2023**: Joined TechNova Solutions as Junior iOS Developer. +- **June 2023**: Assisted in feature development for client app projects. +- **September 2023**: Completed SwiftUI fundamentals training. + +## Annual Performance History +- **2023**: + - **Performance Rating**: 4.3/5 + - **Key Achievements**: Successfully delivered bug fixes that reduced crash reports by 20%. + +## Compensation History +- **2023**: + - **Base Salary**: €45,000 + - **Bonus**: €2,000 + +## Other HR Notes +- **Training Completed**: + - SwiftUI Fundamentals (2023) + +- **Awards**: + - Rookie of the Month (August 2023) + +- **Interests**: + - Mobile UI design, digital painting. + +- **Feedback from HR**: + - Emma is eager to learn and shows promising potential in iOS development. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/li_wang.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/li_wang.md new file mode 100644 index 0000000..f916bd6 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/li_wang.md @@ -0,0 +1,54 @@ +# HR Record + +# Li Wang + +## Summary +- **Date of Birth**: November 3, 1991 +- **Job Title**: Machine Learning Researcher +- **Location**: Berlin, Germany + +## Career Progression +- **September 2016**: Joined as ML Research Intern focusing on NLP models. +- **March 2018**: Promoted to ML Researcher, published 3 papers in AI conferences. +- **July 2021**: Led development of custom ML pipeline for predictive analytics client project. +- **January 2024**: Mentoring new ML engineers and collaborating cross-functionally. + +## Annual Performance History +- **2021**: + - **Performance Rating**: 4.8/5 + - **Key Achievements**: Developed novel transformer model adaptation improving accuracy by 12%. + +- **2022**: + - **Performance Rating**: 4.9/5 + - **Key Achievements**: Successfully delivered a computer vision system automating quality inspection. + +- **2023**: + - **Performance Rating**: 4.9/5 + - **Key Achievements**: Published an internal white paper on explainable AI. + +## Compensation History +- **2021**: + - **Base Salary**: €68,000 + - **Bonus**: €7,500 + +- **2022**: + - **Base Salary**: €75,000 + - **Bonus**: €9,000 + +- **2023**: + - **Base Salary**: €80,000 + - **Bonus**: €11,000 + +## Other HR Notes +- **Training Completed**: + - AI Model Interpretability Workshop (2022) + - TensorFlow Advanced Techniques (2023) + +- **Awards**: + - Best Paper Award, AI Conference 2022 + +- **Interests**: + - AI ethics, chess. + +- **Feedback from HR**: + - Li is a deep thinker and innovator, key to the ML capabilities of TechNova. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/oliver_smith.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/oliver_smith.md new file mode 100644 index 0000000..3fdd870 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/oliver_smith.md @@ -0,0 +1,54 @@ +# HR Record + +# Oliver Smith + +## Summary +- **Date of Birth**: February 14, 1989 +- **Job Title**: Fullstack Engineer +- **Location**: Berlin, Germany + +## Career Progression +- **May 2013**: Joined TechNova as Junior Web Developer. +- **December 2016**: Became Fullstack Engineer handling Node.js backend and iOS frontend integration. +- **September 2019**: Led cross-platform product integration projects. +- **July 2023**: Leads DevOps initiatives for CI/CD pipeline improvements. + +## Annual Performance History +- **2021**: + - **Performance Rating**: 4.5/5 + - **Key Achievements**: Automated deployment pipelines improving release frequency by 25%. + +- **2022**: + - **Performance Rating**: 4.7/5 + - **Key Achievements**: Improved API response times by 35%. + +- **2023**: + - **Performance Rating**: 4.8/5 + - **Key Achievements**: Led migration to containerized microservices architecture. + +## Compensation History +- **2021**: + - **Base Salary**: €70,000 + - **Bonus**: €7,000 + +- **2022**: + - **Base Salary**: €75,000 + - **Bonus**: €8,000 + +- **2023**: + - **Base Salary**: €80,000 + - **Bonus**: €9,000 + +## Other HR Notes +- **Training Completed**: + - Kubernetes & Docker Training (2022) + - Advanced DevOps Practices (2023) + +- **Awards**: + - TechNova Team Player Award 2023 + +- **Interests**: + - Open-source contributor, drone photography. + +- **Feedback from HR**: + - Oliver’s versatile skill set bridges frontend, backend, and infrastructure seamlessly. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/sophia_martinez.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/sophia_martinez.md new file mode 100644 index 0000000..4d4a0bf --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/employees/sophia_martinez.md @@ -0,0 +1,54 @@ +# HR Record + +# Sophia Martinez + +## Summary +- **Date of Birth**: August 22, 1988 +- **Job Title**: Senior Web Developer +- **Location**: Remote (Madrid, Spain) + +## Career Progression +- **January 2014**: Started as Junior Frontend Developer working on React projects. +- **June 2016**: Promoted to Web Developer handling full-stack responsibilities with Node.js backend. +- **May 2019**: Led e-commerce platform redeployment for major client. +- **February 2023**: Became Senior Web Developer and technical team lead. + +## Annual Performance History +- **2021**: + - **Performance Rating**: 4.7/5 + - **Key Achievements**: Spearheaded transition to microservices architecture. + +- **2022**: + - **Performance Rating**: 4.9/5 + - **Key Achievements**: Delivered a complex multi-tenant SaaS platform on time. + +- **2023**: + - **Performance Rating**: 5.0/5 + - **Key Achievements**: Implemented comprehensive CI/CD pipelines improving deployment speed by 40%. + +## Compensation History +- **2021**: + - **Base Salary**: €72,000 + - **Bonus**: €8,500 + +- **2022**: + - **Base Salary**: €78,000 + - **Bonus**: €10,000 + +- **2023**: + - **Base Salary**: €85,000 + - **Bonus**: €12,000 + +## Other HR Notes +- **Training Completed**: + - Kubernetes Administration (2022) + - Advanced React Patterns (2023) + +- **Awards**: + - TechNova Innovation Award 2023 + +- **Interests**: + - Technical blogging, hiking. + +- **Feedback from HR**: + - Sophia is a strategic thinker and a force-multiplier for the web team. diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ios_app_dev.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ios_app_dev.md new file mode 100644 index 0000000..95f1cf9 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ios_app_dev.md @@ -0,0 +1,29 @@ +# iOS App Development Services + +## Overview +TechNova Solutions offers custom native iOS app development using the latest Swift and SwiftUI technologies to build performant, user-friendly applications for iPhone and iPad. + +## Key Features +- Native UI/UX design optimized for iOS platform +- Integration with Apple services: HealthKit, Push Notifications, Siri Shortcuts +- Robust authentication and security features +- App Store submission support and maintenance +- Continuous updates for compatibility with the latest iOS versions + +## Typical Use Cases +- Consumer mobile apps +- Enterprise tools and productivity apps +- E-commerce and retail apps +- Fitness and wellness tracking + +## Technologies +- Swift & SwiftUI +- Combine for reactive programming +- Core Data and CloudKit for persistence +- XCTest for automated testing + +## Benefits +- Faster time-to-market with modern workflows +- High app performance and responsiveness +- Access to native hardware capabilities +- Dedicated support throughout app lifecycle diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ml_solutions.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ml_solutions.md new file mode 100644 index 0000000..4e00211 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/ml_solutions.md @@ -0,0 +1,36 @@ +# Machine Learning Solutions + +## Overview +TechNova Solutions delivers custom machine learning models and AI systems to help businesses unlock insights, automate processes, and drive innovation. + +## Key Capabilities +- Predictive analytics and forecasting +- Natural Language Processing (NLP) for text analysis and chatbots +- Computer vision for image processing and quality control +- Recommendation engines and personalization +- Automated data preprocessing and feature engineering + +## Project Workflow +1. Data collection and cleaning +2. Model development and training +3. Evaluation and performance tuning +4. Deployment and integration +5. Ongoing monitoring and updates + +## Typical Applications +- Customer behavior prediction +- Fraud detection +- Medical diagnostics and image analysis +- Automated content moderation +- Supply chain optimization + +## Technologies +- Python, TensorFlow, PyTorch +- Scikit-learn, Pandas, NumPy +- Cloud ML platforms (AWS SageMaker, GCP AI Platform) + +## Benefits +- Data-driven decision making +- Process automation and efficiency +- Enhanced customer experience through personalization +- Competitive advantage via AI innovation diff --git a/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/web_dev_services.md b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/web_dev_services.md new file mode 100644 index 0000000..72c17aa --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/knowledge_base/products/web_dev_services.md @@ -0,0 +1,37 @@ +# Web Development Services + +## Overview +Our full-stack web development solutions provide scalable, responsive, and secure web applications tailored to diverse business needs, from startups to enterprises. + +## Core Services +- Frontend development using React.js, Vue.js, or Angular +- Backend APIs and microservices with Node.js, Express, and PostgreSQL +- E-commerce platform design and integration +- Content management system (CMS) development +- Cloud deployment and DevOps automation + +## Features +- Mobile-first responsive design +- Secure authentication and role-based access +- API integrations with third-party services +- Real-time data synchronization and notifications +- Scalable architecture for high-traffic applications + +## Typical Clients +- SaaS businesses +- Online retailers and marketplaces +- Media and content platforms +- Financial and healthcare organizations + +## Technologies +- JavaScript/TypeScript +- React, Vue, Angular +- Node.js, Express +- PostgreSQL, MongoDB +- Docker, Kubernetes + +## Benefits +- Custom solutions built for your unique business challenges +- Improved user engagement and retention +- Scalable systems ready for growth +- Expert support and maintenance diff --git a/week5/community-contributions/ruby_rag_console_chat_app/main.rb b/week5/community-contributions/ruby_rag_console_chat_app/main.rb new file mode 100644 index 0000000..d7dab37 --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/main.rb @@ -0,0 +1,68 @@ +require_relative 'initializer' + +chat_history = [ + { role: "system", content: "You are helpful assistant. If you do not know the answer, do not make it up, just say you do not know." } +] +openai = OpenAI::Client.new(uri_base: 'http://localhost:11434/v1', access_token: 'ollama') # LLaMa locally + +def get_embeddings(text, client) + response = client.embeddings(parameters: { model: "llama3.2", input: text }) + response["data"].first["embedding"] +end + +def build_prompt(chat_history:, context_chunks:, user_message:) + context_text = context_chunks.join("\n---\n") + + <<~PROMPT + Here is an additional context from our knowledge base: + #{context_text} + + New question from a user: + User: #{user_message} + + Please answer using the context above. + PROMPT +end + +collection_name = "my_collection" +collection = Chroma::Resources::Collection.get(collection_name) + +puts "Welcome to our little experiment!" + +while true + user_message = gets.chomp + + if user_message == 'exit' + puts "Ending the session" + exit 0 + end + + query_embedding = get_embeddings(user_message, openai) + results = collection.query(query_embeddings: [query_embedding], results: 15) + retrieved_chunks = results.map(&:document) + + prompt = build_prompt( + chat_history: chat_history, + context_chunks: retrieved_chunks, + user_message: user_message + ) + chat_history << { role: "user", content: prompt } + + response_message = "" + openai.chat(parameters: { + model: "llama3.2", + messages: chat_history, + stream: proc { |chunk, _bytesize| + delta = chunk.dig("choices", 0, "delta", "content") + next unless delta + print delta + response_message << delta + } + }) + chat_history << { role: "assistant", content: response_message } + puts +end + + + + diff --git a/week5/community-contributions/ruby_rag_console_chat_app/seed.rb b/week5/community-contributions/ruby_rag_console_chat_app/seed.rb new file mode 100644 index 0000000..d509b7d --- /dev/null +++ b/week5/community-contributions/ruby_rag_console_chat_app/seed.rb @@ -0,0 +1,129 @@ +require_relative 'initializer' +require 'pragmatic_segmenter' + +folders = Dir.glob("knowledge_base/*").select { |f| File.directory?(f) } + +documents = [] + +folders.each do |folder| + doc_type = File.basename(folder) + Dir.glob("#{folder}/**/*.md").each do |filepath| + content = File.read(filepath, encoding: 'utf-8') + doc = { + content: content, + path: filepath, + metadata: { "doc_type" => doc_type } + } + documents << doc + end +end + +# chunk_size and chunk_overlap are configurable for getting better results +def split_text(text, chunk_size: 1000, chunk_overlap: 200) + chunks = [] + start = 0 + while start < text.length + finish = [start + chunk_size, text.length].min + chunks << text[start...finish] + break if finish == text.length + start += (chunk_size - chunk_overlap) + end + chunks +end + +def split_text_by_sentence(text, chunk_size: 1500, chunk_overlap: 200) + ps = PragmaticSegmenter::Segmenter.new(text: text) + sentences = ps.segment + chunks = [] + current_chunk = "" + sentences.each do |sentence| + if (current_chunk + sentence).length > chunk_size + chunks << current_chunk.strip + # For overlap, take last N chars from current_chunk (optionally at sentence boundary) + overlap = current_chunk[-chunk_overlap..-1] || "" + current_chunk = overlap + sentence + else + current_chunk += " " unless current_chunk.empty? + current_chunk += sentence + end + end + chunks << current_chunk.strip unless current_chunk.empty? + chunks +end + +chunks = [] + +# documents.each do |doc| +# split_text(doc[:content]).each_with_index do |chunk, idx| +# chunks << { +# content: chunk, +# metadata: doc[:metadata].merge({ "chunk_index" => idx, "path" => doc[:path] }) +# } +# end +# end + +documents.each do |doc| + split_text_by_sentence(doc[:content]).each_with_index do |chunk, idx| + chunks << { + content: chunk, + metadata: doc[:metadata].merge({ "chunk_index" => idx, "path" => doc[:path] }) + } + end +end + +puts "Chucks count: #{chunks.count}" +puts "Document types found: #{chunks.map { _1[:metadata]['doc_type']}.uniq.join(', ') }" + +# 1. Set up OpenAI client (replace with RubyLLM or HTTP if using HuggingFace) +# openai = OpenAI::Client.new(access_token: ENV['OPENAI_API_KEY']) # OpenAI API, remotely +openai = OpenAI::Client.new(uri_base: 'http://localhost:11434/v1', access_token: 'ollama') # LLaMa, locally + +# 2. Get embeddings for each chunk +def get_embedding(text, client) + response = client.embeddings(parameters: { model: "llama3.2", input: text }) + response["data"].first["embedding"] +end + +# Check current Chrome server version +version = Chroma::Resources::Database.version +puts version + +collection_name = "my_collection" +collection = begin + Chroma::Resources::Collection.get(collection_name) +rescue Chroma::APIError => e + nil +end + +if collection + puts "Collection already exists" + puts "Do you want to reset it? (y/n)" + answer = gets.chomp + if answer == 'y' + Chroma::Resources::Collection.delete(collection_name) + puts 'Collection deleted' + exit 0 + end +end + +puts "Creating collection - #{collection_name}" +collection = Chroma::Resources::Collection.create(collection_name, { lang: "ruby" }) unless collection + +chunks.each do |chunk| + chunk[:embedding] = get_embedding(chunk[:content], openai) +end + +# 4. Insert into Chroma +embeddings = chunks.each_with_index.map do |chunk, idx| + Chroma::Resources::Embedding.new( + id: "chunk-#{idx}", + embedding: chunk[:embedding], + metadata: chunk[:metadata], + document: chunk[:content] + ) +end + +collection.add(embeddings) +puts "Vectorstore created with #{embeddings.size} documents" + +# Now 'chunks' is an array of hashes with chunk[:] and metadata.