Fixed problem with colons in filenames and added more of week 6

2024-09-18 06:36:48 -04:00
parent 21afbd6c73
commit 1920e76510
8 changed files with 8501 additions and 715 deletions
--- a/week5/knowledge-base/contracts/Contract
+++ b/week5/knowledge-base/contracts/Contract
--- a/week5/knowledge-base/contracts/Contract
+++ b/week5/knowledge-base/contracts/Contract
--- a/week6/day3.ipynb
+++ b/week6/day3.ipynb
--- a/week6/day4-results.ipynb
+++ b/week6/day4-results.ipynb
--- a/week6/day4.ipynb
+++ b/week6/day4.ipynb
--- a/week6/human_input.csv
+++ b/week6/human_input.csv
--- a/week6/human_output.csv
+++ b/week6/human_output.csv
--- a/week6/testing.py
+++ b/week6/testing.py
@@ -0,0 +1,75 @@
 import math
 import matplotlib.pyplot as plt
 GREEN = "\033[92m"
 YELLOW = "\033[93m"
 RED = "\033[91m"
 RESET = "\033[0m"
 COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
 class Tester:
    def __init__(self, predictor, data, title=None, size=250):
        self.predictor = predictor
        self.data = data
        self.title = title or predictor.__name__.replace("_", " ").title()
        self.size = size
        self.guesses = []
        self.truths = []
        self.errors = []
        self.sles = []
        self.colors = []
    def color_for(self, error, truth):
        if error<40 or error/truth < 0.2:
            return "green"
        elif error<80 or error/truth < 0.4:
            return "orange"
        else:
            return "red"
    def run_datapoint(self, i):
        datapoint = self.data[i]
        guess = self.predictor(datapoint)
        truth = datapoint.price
        error = abs(guess - truth)
        log_error = math.log(truth+1) - math.log(guess+1)
        sle = log_error ** 2
        color = self.color_for(error, truth)
        title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
        self.guesses.append(guess)
        self.truths.append(truth)
        self.errors.append(error)
        self.sles.append(sle)
        self.colors.append(color)
        print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
    def chart(self, title):
        max_error = max(self.errors)
        plt.figure(figsize=(12, 8))
        max_val = max(max(self.truths), max(self.guesses))
        plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
        plt.xlabel('Ground Truth')
        plt.ylabel('Model Estimate')
        plt.xlim(0, max_val)
        plt.ylim(0, max_val)
        plt.title(title)
        plt.show()
    def report(self):
        average_error = sum(self.errors) / self.size
        rmsle = math.sqrt(sum(self.sles) / self.size)
        hits = sum(1 for color in self.colors if color=="green")
        title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
        self.chart(title)
    def run(self):
        self.error = 0
        for i in range(self.size):
            self.run_datapoint(i)
        self.report()
    @classmethod
    def test(cls, function, data):
        cls(function, data).run()