Fixed problem with colons in filenames and added more of week 6

2024-09-18 06:36:48 -04:00
parent 21afbd6c73
commit 1920e76510
8 changed files with 8501 additions and 715 deletions
--- a/week5/knowledge-base/contracts/Contract
+++ b/week5/knowledge-base/contracts/Contract
--- a/week5/knowledge-base/contracts/Contract
+++ b/week5/knowledge-base/contracts/Contract
--- a/week6/day3.ipynb
+++ b/week6/day3.ipynb
--- a/week6/day4-results.ipynb
+++ b/week6/day4-results.ipynb
--- a/week6/day4.ipynb
+++ b/week6/day4.ipynb
--- a/week6/human_input.csv
+++ b/week6/human_input.csv
--- a/week6/human_output.csv
+++ b/week6/human_output.csv
--- a/week6/testing.py
+++ b/week6/testing.py
@@ -0,0 +1,75 @@
+import math
+import matplotlib.pyplot as plt
+
+GREEN = "\033[92m"
+YELLOW = "\033[93m"
+RED = "\033[91m"
+RESET = "\033[0m"
+COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
+
+class Tester:
+
+    def __init__(self, predictor, data, title=None, size=250):
+        self.predictor = predictor
+        self.data = data
+        self.title = title or predictor.__name__.replace("_", " ").title()
+        self.size = size
+        self.guesses = []
+        self.truths = []
+        self.errors = []
+        self.sles = []
+        self.colors = []
+
+    def color_for(self, error, truth):
+        if error<40 or error/truth < 0.2:
+            return "green"
+        elif error<80 or error/truth < 0.4:
+            return "orange"
+        else:
+            return "red"
+    
+    def run_datapoint(self, i):
+        datapoint = self.data[i]
+        guess = self.predictor(datapoint)
+        truth = datapoint.price
+        error = abs(guess - truth)
+        log_error = math.log(truth+1) - math.log(guess+1)
+        sle = log_error ** 2
+        color = self.color_for(error, truth)
+        title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
+        self.guesses.append(guess)
+        self.truths.append(truth)
+        self.errors.append(error)
+        self.sles.append(sle)
+        self.colors.append(color)
+        print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
+
+    def chart(self, title):
+        max_error = max(self.errors)
+        plt.figure(figsize=(12, 8))
+        max_val = max(max(self.truths), max(self.guesses))
+        plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
+        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
+        plt.xlabel('Ground Truth')
+        plt.ylabel('Model Estimate')
+        plt.xlim(0, max_val)
+        plt.ylim(0, max_val)
+        plt.title(title)
+        plt.show()
+
+    def report(self):
+        average_error = sum(self.errors) / self.size
+        rmsle = math.sqrt(sum(self.sles) / self.size)
+        hits = sum(1 for color in self.colors if color=="green")
+        title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
+        self.chart(title)
+
+    def run(self):
+        self.error = 0
+        for i in range(self.size):
+            self.run_datapoint(i)
+        self.report()
+
+    @classmethod
+    def test(cls, function, data):
+        cls(function, data).run()