modification du clean csv et du makefile

2025-09-19 12:44:06 +02:00 · 2025-09-19 12:44:06 +02:00 · eea1bd8c4b
commit eea1bd8c4b
parent 55d4b4ff85
2 changed files with 99 additions and 27 deletions
--- a/8
+++ b/8
@ -36,10 +36,18 @@ import:
 	@until docker exec -i $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB); do \
 		sleep 2; \
 	done
+	@echo "🧹 Nettoyage des CSV avant import (local -> in-place)..."
+	@python3 scripts/clean_csv.py
+	@if [ $$? -ne 0 ]; then echo "❌ Erreurs lors du nettoyage des CSV - corrige les fichiers avant import"; exit 1; fi
 	@echo "📥 Import des données de démo..."
 	@docker exec -i $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB) -f $(IMPORT_SQL)
 	@echo "✅ Données importées."

+set-dev-admin-pw:
+	@echo "🔐 Définir mot de passe dev pour admin@ptits-pas.fr (admin123)"
+	@docker exec -i $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB) -c "UPDATE utilisateurs SET password = crypt('admin123', gen_salt('bf')) WHERE email = 'admin@ptits-pas.fr';"
+	@echo "✅ Mot de passe admin dev mis à jour."
+
 verify:
 	@echo "⏳ Attente que Postgres démarre pour vérifier..."
 	@until docker exec -i $(PG_CONTAINER) pg_isready -U $(PG_USER) -d $(PG_DB); do \
--- a/scripts/clean_csv.py
+++ b/scripts/clean_csv.py
@ -1,59 +1,123 @@
 #!/usr/bin/env python3
+"""scripts/clean_csv.py
+
+Usage:
+  python3 scripts/clean_csv.py [input_dir] [--out-dir OUT] [--dry-run]
+
+This script cleans CSV files (trim, remove 'NULL', fix column count) and
+performs simple validations (UUID-looking columns, date columns).
+"""
 import csv
 import sys
 from pathlib import Path
+from datetime import datetime
+import argparse
+import re

-def clean_csv_file(file_path: Path):
-    """
-    Nettoie un CSV directement en place :
-    - remplace "NULL" par ""
-    - supprime les espaces parasites
-    - force le même nombre de colonnes que l'en-tête
-    """
+UUID_RE = re.compile(r'^[0-9a-fA-F-]{36}$')
+
+
+def is_uuid(s: str) -> bool:
+    return bool(UUID_RE.match(s))
+
+
+def looks_like_date(s: str) -> bool:
+    if not s:
+        return False
+    # try ISO-like detection
+    try:
+        datetime.fromisoformat(s)
+        return True
+    except Exception:
+        return False
+
+
+def clean_csv_file(file_path: Path, out_path: Path = None, dry_run: bool = False):
    cleaned_rows = []
+    errors = []

-    with open(file_path, "r", encoding="utf-8-sig", newline="") as infile:
+    with file_path.open("r", encoding="utf-8-sig", newline="") as infile:
        reader = csv.reader(infile)
        try:
            header = next(reader)
        except StopIteration:
-            print(f"[⚠️] Fichier vide : {file_path}")
-            return
+            return {'file': str(file_path), 'error': 'empty', 'errors': []}

        nb_cols = len(header)
        cleaned_rows.append([h.strip() for h in header])

+        # Heuristics for validations
+        uuid_cols = [i for i, h in enumerate(header) if h.lower() == 'id' or h.lower().endswith('_id')]
+        date_cols = [i for i, h in enumerate(header) if any(k in h.lower() for k in ('date', '_le', '_at'))]
+
        for i, row in enumerate(reader, start=2):
-            # Ajuste le nombre de colonnes
            if len(row) < nb_cols:
                row.extend([""] * (nb_cols - len(row)))
            elif len(row) > nb_cols:
                row = row[:nb_cols]

            # Nettoyage cellule par cellule
-            row = [cell.strip().replace("NULL", "") for cell in row]
+            row = [cell.strip().replace('NULL', '') for cell in row]
+
+            # Validations simples
+            for ci in uuid_cols:
+                if ci < len(row) and row[ci]:
+                    if not is_uuid(row[ci]):
+                        errors.append(f"Line {i}: column {header[ci]} not UUID-like: {row[ci]!r}")
+            for ci in date_cols:
+                if ci < len(row) and row[ci]:
+                    if not looks_like_date(row[ci]):
+                        errors.append(f"Line {i}: column {header[ci]} not ISO-like date: {row[ci]!r}")
+
            cleaned_rows.append(row)

-    # Réécriture dans le même fichier
-    with open(file_path, "w", encoding="utf-8", newline="") as outfile:
-        writer = csv.writer(outfile)
-        writer.writerows(cleaned_rows)
+    # Write output if not dry-run
+    if not dry_run:
+        target = out_path if out_path else file_path
+        with target.open('w', encoding='utf-8', newline='') as outfile:
+            writer = csv.writer(outfile)
+            writer.writerows(cleaned_rows)

-    print(f"[✔] Nettoyé : {file_path}")
+    return {'file': str(file_path), 'errors': errors}


 def main():
-    if len(sys.argv) > 1:
-        base_dir = Path(sys.argv[1])
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_dir', nargs='?', default='bdd/data_test')
+    parser.add_argument('--out-dir', '-o', help='Optional output directory for cleaned files')
+    parser.add_argument('--dry-run', action='store_true', help='Do not write files, just report')
+    args = parser.parse_args()
+
+    base_dir = Path(args.input_dir)
+    out_dir = Path(args.out_dir) if args.out_dir else None
+    if out_dir and not out_dir.exists():
+        out_dir.mkdir(parents=True)
+
+    results = []
+    for file_path in sorted(base_dir.glob('*.csv')):
+        out_path = out_dir / file_path.name if out_dir else None
+        res = clean_csv_file(file_path, out_path, args.dry_run)
+        results.append(res)
+
+    # Report
+    any_errors = False
+    for r in results:
+        if 'error' in r:
+            print(f"[WARN] {r['file']}: {r['error']}")
+        if r.get('errors'):
+            any_errors = True
+            print(f"[ERR] {r['file']} ->")
+            for e in r['errors'][:20]:
+                print('  -', e)
+        else:
+            print(f"[OK] {r['file']}")
+
+    if any_errors:
+        print('\nSome files have validation issues. Fix them or run with --dry-run to inspect.')
+        sys.exit(2)
    else:
-        base_dir = Path("bdd/data_test")
-
-    print(f"🔎 Nettoyage des CSV dans : {base_dir}")
-    for file_path in base_dir.glob("*.csv"):
-        clean_csv_file(file_path)
-
-    print(f"✅ Terminé. Les fichiers CSV ont été écrasés (nettoyés en place).")
+        print('\nAll files cleaned successfully.')


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()