Add juan contribution

This commit is contained in:
Jsrodrigue
2025-10-23 15:29:54 +01:00
parent a1a9bc0f95
commit 101b0baf62
18 changed files with 1426 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
import hashlib
import pandas as pd
def hash_row(row: pd.Series) -> str:
"""Compute MD5 hash for a row to detect duplicates."""
return hashlib.md5(str(tuple(row)).encode()).hexdigest()
def sample_reference(reference_df: pd.DataFrame, n_reference_rows: int) -> list:
"""Return a fresh sample of reference data for batch generation."""
if reference_df is not None and not reference_df.empty:
sample_df = reference_df.sample(min(n_reference_rows, len(reference_df)), replace=False)
return sample_df.to_dict(orient="records")
return []