diff --git a/makefile b/makefile index a3f575b..0fe435d 100644 --- a/makefile +++ b/makefile @@ -36,10 +36,18 @@ import: @until docker exec -i $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB); do \ sleep 2; \ done + @echo "🧹 Nettoyage des CSV avant import (local -> in-place)..." + @python3 scripts/clean_csv.py + @if [ $$? -ne 0 ]; then echo "❌ Erreurs lors du nettoyage des CSV - corrige les fichiers avant import"; exit 1; fi @echo "📥 Import des données de démo..." @docker exec -i $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB) -f $(IMPORT_SQL) @echo "✅ Données importées." +set-dev-admin-pw: + @echo "🔐 Définir mot de passe dev pour admin@ptits-pas.fr (admin123)" + @docker exec -i $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB) -c "UPDATE utilisateurs SET password = crypt('admin123', gen_salt('bf')) WHERE email = 'admin@ptits-pas.fr';" + @echo "✅ Mot de passe admin dev mis à jour." + verify: @echo "⏳ Attente que Postgres démarre pour vérifier..." @until docker exec -i $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB); do \ diff --git a/scripts/clean_csv.py b/scripts/clean_csv.py index 33683c6..9f1b01a 100755 --- a/scripts/clean_csv.py +++ b/scripts/clean_csv.py @@ -1,59 +1,123 @@ #!/usr/bin/env python3 +"""scripts/clean_csv.py + +Usage: + python3 scripts/clean_csv.py [input_dir] [--out-dir OUT] [--dry-run] + +This script cleans CSV files (trim, remove 'NULL', fix column count) and +performs simple validations (UUID-looking columns, date columns). +""" import csv import sys from pathlib import Path +from datetime import datetime +import argparse +import re -def clean_csv_file(file_path: Path): - """ - Nettoie un CSV directement en place : - - remplace "NULL" par "" - - supprime les espaces parasites - - force le même nombre de colonnes que l'en-tête - """ +UUID_RE = re.compile(r'^[0-9a-fA-F-]{36}$') + + +def is_uuid(s: str) -> bool: + return bool(UUID_RE.match(s)) + + +def looks_like_date(s: str) -> bool: + if not s: + return False + # try ISO-like detection + try: + datetime.fromisoformat(s) + return True + except Exception: + return False + + +def clean_csv_file(file_path: Path, out_path: Path = None, dry_run: bool = False): cleaned_rows = [] + errors = [] - with open(file_path, "r", encoding="utf-8-sig", newline="") as infile: + with file_path.open("r", encoding="utf-8-sig", newline="") as infile: reader = csv.reader(infile) try: header = next(reader) except StopIteration: - print(f"[⚠️] Fichier vide : {file_path}") - return + return {'file': str(file_path), 'error': 'empty', 'errors': []} nb_cols = len(header) cleaned_rows.append([h.strip() for h in header]) + # Heuristics for validations + uuid_cols = [i for i, h in enumerate(header) if h.lower() == 'id' or h.lower().endswith('_id')] + date_cols = [i for i, h in enumerate(header) if any(k in h.lower() for k in ('date', '_le', '_at'))] + for i, row in enumerate(reader, start=2): - # Ajuste le nombre de colonnes if len(row) < nb_cols: row.extend([""] * (nb_cols - len(row))) elif len(row) > nb_cols: row = row[:nb_cols] # Nettoyage cellule par cellule - row = [cell.strip().replace("NULL", "") for cell in row] + row = [cell.strip().replace('NULL', '') for cell in row] + + # Validations simples + for ci in uuid_cols: + if ci < len(row) and row[ci]: + if not is_uuid(row[ci]): + errors.append(f"Line {i}: column {header[ci]} not UUID-like: {row[ci]!r}") + for ci in date_cols: + if ci < len(row) and row[ci]: + if not looks_like_date(row[ci]): + errors.append(f"Line {i}: column {header[ci]} not ISO-like date: {row[ci]!r}") + cleaned_rows.append(row) - # Réécriture dans le même fichier - with open(file_path, "w", encoding="utf-8", newline="") as outfile: - writer = csv.writer(outfile) - writer.writerows(cleaned_rows) + # Write output if not dry-run + if not dry_run: + target = out_path if out_path else file_path + with target.open('w', encoding='utf-8', newline='') as outfile: + writer = csv.writer(outfile) + writer.writerows(cleaned_rows) - print(f"[✔] Nettoyé : {file_path}") + return {'file': str(file_path), 'errors': errors} def main(): - if len(sys.argv) > 1: - base_dir = Path(sys.argv[1]) + parser = argparse.ArgumentParser() + parser.add_argument('input_dir', nargs='?', default='bdd/data_test') + parser.add_argument('--out-dir', '-o', help='Optional output directory for cleaned files') + parser.add_argument('--dry-run', action='store_true', help='Do not write files, just report') + args = parser.parse_args() + + base_dir = Path(args.input_dir) + out_dir = Path(args.out_dir) if args.out_dir else None + if out_dir and not out_dir.exists(): + out_dir.mkdir(parents=True) + + results = [] + for file_path in sorted(base_dir.glob('*.csv')): + out_path = out_dir / file_path.name if out_dir else None + res = clean_csv_file(file_path, out_path, args.dry_run) + results.append(res) + + # Report + any_errors = False + for r in results: + if 'error' in r: + print(f"[WARN] {r['file']}: {r['error']}") + if r.get('errors'): + any_errors = True + print(f"[ERR] {r['file']} ->") + for e in r['errors'][:20]: + print(' -', e) + else: + print(f"[OK] {r['file']}") + + if any_errors: + print('\nSome files have validation issues. Fix them or run with --dry-run to inspect.') + sys.exit(2) else: - base_dir = Path("bdd/data_test") - - print(f"🔎 Nettoyage des CSV dans : {base_dir}") - for file_path in base_dir.glob("*.csv"): - clean_csv_file(file_path) - - print(f"✅ Terminé. Les fichiers CSV ont été écrasés (nettoyés en place).") + print('\nAll files cleaned successfully.') -if __name__ == "__main__": +if __name__ == '__main__': main()