Add juan contribution
This commit is contained in:
@@ -0,0 +1,14 @@
|
||||
import hashlib
|
||||
import pandas as pd
|
||||
|
||||
def hash_row(row: pd.Series) -> str:
|
||||
"""Compute MD5 hash for a row to detect duplicates."""
|
||||
return hashlib.md5(str(tuple(row)).encode()).hexdigest()
|
||||
|
||||
|
||||
def sample_reference(reference_df: pd.DataFrame, n_reference_rows: int) -> list:
|
||||
"""Return a fresh sample of reference data for batch generation."""
|
||||
if reference_df is not None and not reference_df.empty:
|
||||
sample_df = reference_df.sample(min(n_reference_rows, len(reference_df)), replace=False)
|
||||
return sample_df.to_dict(orient="records")
|
||||
return []
|
||||
Reference in New Issue
Block a user