From 013083176e2796988907ec364e7f713454a74eb6 Mon Sep 17 00:00:00 2001
From: 951095 <armandbecot15@gmail.com>
Date: Tue, 23 Sep 2025 09:39:48 +0200
Subject: [PATCH] script de netoyage des csv et securisation des variables
 d'environnement

---
 docker-compose.dev.yml |  9 +++++----
 docker-compose.yml     | 10 +++++-----
 makefile               |  5 ++---
 scripts/clean_csv.py   | 23 ++++++++++++-----------
 verify.log             | 40 ++++++++++++++++++++--------------------
 5 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
index 3f5b8ff..e32d46b 100644
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -9,13 +9,14 @@ services:
     restart: unless-stopped
     environment:
       POSTGRES_USER: ${POSTGRES_USER:-admin}
-      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-admin123}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
       POSTGRES_DB: ${POSTGRES_DB:-ptitpas_db}
     ports:
       - "5433:5432"
     volumes:
-      # Scripts de migration (ordre important → sync_enums, init, indexes, checks, triggers, import…)
-      - ./migrations/00_sync_enums.sql:/docker-entrypoint-initdb.d/00_sync_enums.sql
+  # Scripts de migration (init first). NOTE: `00_sync_enums.sql` is intentionally
+  # NOT mounted here because it must be executed after `01_init.sql` (it alters
+  # existing enum types). The local pipeline applies it explicitly after init.
       - ./migrations/01_init.sql:/docker-entrypoint-initdb.d/01_init.sql
       - ./migrations/02_indexes.sql:/docker-entrypoint-initdb.d/02_indexes.sql
       - ./migrations/03_checks.sql:/docker-entrypoint-initdb.d/03_checks.sql
@@ -39,7 +40,7 @@ services:
     restart: unless-stopped
     environment:
       PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@ptits-pas.fr}
-      PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin123}
+      PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD}
     ports:
       - "8081:80"
     depends_on:
diff --git a/docker-compose.yml b/docker-compose.yml
index d6babdc..71e7ebc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -4,9 +4,9 @@ services:
     container_name: ynov-postgres
     restart: unless-stopped
     environment:
-      POSTGRES_USER: admin
-      POSTGRES_PASSWORD: admin123
-      POSTGRES_DB: ptitpas_db
+      POSTGRES_USER: ${POSTGRES_USER:-admin}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
+      POSTGRES_DB: ${POSTGRES_DB:-ptitpas_db}
     volumes:
       - ./migrations/01_init.sql:/docker-entrypoint-initdb.d/01_init.sql
       - postgres_data:/var/lib/postgresql/data
@@ -18,8 +18,8 @@ services:
     container_name: ynov-pgadmin
     restart: unless-stopped
     environment:
-      PGADMIN_DEFAULT_EMAIL: admin@ptits-pas.fr
-      PGADMIN_DEFAULT_PASSWORD: admin123
+      PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@ptits-pas.fr}
+      PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD}
     depends_on:
       - db
     labels:
diff --git a/makefile b/makefile
index 0fe435d..447b9f3 100644
--- a/makefile
+++ b/makefile
@@ -75,12 +75,11 @@ logs:
 psql:
 	docker exec -it $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB)
 
-stop:
-	docker compose -f docker-compose.dev.yml down
 stop:
 	docker compose -f docker-compose.dev.yml down
 
 sync-enums:
 	@echo "🔁 Génération de migrations/00_sync_enums.sql depuis les CSV"
 	@python3 scripts/sync_enums.py > migrations/00_sync_enums.sql
-	@echo "✅ migrations/00_sync_enums.sql générée. Relis le fichier avant de l'appliquer." 
\ No newline at end of file
+	@echo "✅ migrations/00_sync_enums.sql générée. Relis le fichier avant de l'appliquer." 
+
diff --git a/scripts/clean_csv.py b/scripts/clean_csv.py
index 9f1b01a..aa5e30f 100755
--- a/scripts/clean_csv.py
+++ b/scripts/clean_csv.py
@@ -2,10 +2,11 @@
 """scripts/clean_csv.py
 
 Usage:
-  python3 scripts/clean_csv.py [input_dir] [--out-dir OUT] [--dry-run]
+    python3 scripts/clean_csv.py [input_dir] [--out-dir OUT] [--dry-run]
 
-This script cleans CSV files (trim, remove 'NULL', fix column count) and
-performs simple validations (UUID-looking columns, date columns).
+Ce script nettoie les fichiers CSV (trim, suppression de 'NULL', correction du
+nombre de colonnes) et réalise des validations simples (colonnes ressemblant
+à des UUID, colonnes date).
 """
 import csv
 import sys
@@ -46,7 +47,7 @@ def clean_csv_file(file_path: Path, out_path: Path = None, dry_run: bool = False
         nb_cols = len(header)
         cleaned_rows.append([h.strip() for h in header])
 
-        # Heuristics for validations
+        # Heuristiques pour les validations
         uuid_cols = [i for i, h in enumerate(header) if h.lower() == 'id' or h.lower().endswith('_id')]
         date_cols = [i for i, h in enumerate(header) if any(k in h.lower() for k in ('date', '_le', '_at'))]
 
@@ -63,15 +64,15 @@ def clean_csv_file(file_path: Path, out_path: Path = None, dry_run: bool = False
             for ci in uuid_cols:
                 if ci < len(row) and row[ci]:
                     if not is_uuid(row[ci]):
-                        errors.append(f"Line {i}: column {header[ci]} not UUID-like: {row[ci]!r}")
+                        errors.append(f"Ligne {i} : colonne {header[ci]} ne ressemble pas à un UUID : {row[ci]!r}")
             for ci in date_cols:
                 if ci < len(row) and row[ci]:
                     if not looks_like_date(row[ci]):
-                        errors.append(f"Line {i}: column {header[ci]} not ISO-like date: {row[ci]!r}")
+                        errors.append(f"Ligne {i} : colonne {header[ci]} n'est pas une date ISO-like : {row[ci]!r}")
 
             cleaned_rows.append(row)
 
-    # Write output if not dry-run
+    # Écrire la sortie si non --dry-run
     if not dry_run:
         target = out_path if out_path else file_path
         with target.open('w', encoding='utf-8', newline='') as outfile:
@@ -99,11 +100,11 @@ def main():
         res = clean_csv_file(file_path, out_path, args.dry_run)
         results.append(res)
 
-    # Report
+    # Rapport
     any_errors = False
     for r in results:
         if 'error' in r:
-            print(f"[WARN] {r['file']}: {r['error']}")
+            print(f"[AVERT] {r['file']}: {r['error']}")
         if r.get('errors'):
             any_errors = True
             print(f"[ERR] {r['file']} ->")
@@ -113,10 +114,10 @@ def main():
             print(f"[OK] {r['file']}")
 
     if any_errors:
-        print('\nSome files have validation issues. Fix them or run with --dry-run to inspect.')
+        print('\nCertains fichiers présentent des problèmes de validation. Corrigez-les ou lancez avec --dry-run pour inspecter.')
         sys.exit(2)
     else:
-        print('\nAll files cleaned successfully.')
+        print('\nTous les fichiers ont été nettoyés avec succès.')
 
 
 if __name__ == '__main__':
diff --git a/verify.log b/verify.log
index f859211..d2a220e 100644
--- a/verify.log
+++ b/verify.log
@@ -6,7 +6,7 @@
 
           executed_at          
 -------------------------------
- 2025-09-19 09:15:01.473058+00
+ 2025-09-22 08:30:37.786643+00
 (1 row)
 
 === 1) Comptes & répartition par rôle ==========================
@@ -126,49 +126,49 @@
 === 13) Performance : EXPLAIN sur requêtes clés ===============
                                                                      QUERY PLAN                                                                     
 ----------------------------------------------------------------------------------------------------------------------------------------------------
- Limit  (cost=11.31..11.31 rows=3 width=89) (actual time=0.035..0.036 rows=0 loops=1)
-   ->  Sort  (cost=11.31..11.31 rows=3 width=89) (actual time=0.032..0.033 rows=0 loops=1)
+ Limit  (cost=11.31..11.31 rows=3 width=89) (actual time=0.020..0.021 rows=0 loops=1)
+   ->  Sort  (cost=11.31..11.31 rows=3 width=89) (actual time=0.019..0.019 rows=0 loops=1)
          Sort Key: cree_le DESC
          Sort Method: quicksort  Memory: 25kB
-         ->  Bitmap Heap Scan on messages m  (cost=4.17..11.28 rows=3 width=89) (actual time=0.023..0.024 rows=0 loops=1)
+         ->  Bitmap Heap Scan on messages m  (cost=4.17..11.28 rows=3 width=89) (actual time=0.013..0.014 rows=0 loops=1)
                Recheck Cond: (id_dossier = 'dddddddd-dddd-dddd-dddd-dddddddddddd'::uuid)
-               ->  Bitmap Index Scan on idx_messages_id_dossier_cree_le  (cost=0.00..4.17 rows=3 width=0) (actual time=0.012..0.013 rows=0 loops=1)
+               ->  Bitmap Index Scan on idx_messages_id_dossier_cree_le  (cost=0.00..4.17 rows=3 width=0) (actual time=0.006..0.007 rows=0 loops=1)
                      Index Cond: (id_dossier = 'dddddddd-dddd-dddd-dddd-dddddddddddd'::uuid)
- Planning Time: 1.468 ms
- Execution Time: 0.521 ms
+ Planning Time: 0.106 ms
+ Execution Time: 0.105 ms
 (10 rows)
 
                                                                      QUERY PLAN                                                                      
 -----------------------------------------------------------------------------------------------------------------------------------------------------
- Index Scan using idx_evenements_id_enfant_date_debut on evenements ev  (cost=0.15..8.17 rows=1 width=161) (actual time=0.006..0.007 rows=0 loops=1)
+ Index Scan using idx_evenements_id_enfant_date_debut on evenements ev  (cost=0.15..8.17 rows=1 width=161) (actual time=0.021..0.021 rows=0 loops=1)
    Index Cond: ((id_enfant = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'::uuid) AND (date_debut >= '2025-01-01 00:00:00+00'::timestamp with time zone))
- Planning Time: 0.117 ms
- Execution Time: 0.025 ms
+ Planning Time: 0.064 ms
+ Execution Time: 0.032 ms
 (4 rows)
 
                                                                       QUERY PLAN                                                                      
 ------------------------------------------------------------------------------------------------------------------------------------------------------
- Limit  (cost=9.52..9.53 rows=2 width=73) (actual time=0.012..0.013 rows=0 loops=1)
-   ->  Sort  (cost=9.52..9.53 rows=2 width=73) (actual time=0.011..0.012 rows=0 loops=1)
+ Limit  (cost=9.52..9.53 rows=2 width=73) (actual time=0.018..0.019 rows=0 loops=1)
+   ->  Sort  (cost=9.52..9.53 rows=2 width=73) (actual time=0.017..0.018 rows=0 loops=1)
          Sort Key: cree_le DESC
          Sort Method: quicksort  Memory: 25kB
-         ->  Bitmap Heap Scan on notifications n  (cost=4.17..9.51 rows=2 width=73) (actual time=0.007..0.008 rows=0 loops=1)
+         ->  Bitmap Heap Scan on notifications n  (cost=4.17..9.51 rows=2 width=73) (actual time=0.014..0.014 rows=0 loops=1)
                Recheck Cond: ((id_utilisateur = '33333333-3333-3333-3333-333333333333'::uuid) AND (NOT lu))
-               ->  Bitmap Index Scan on idx_notifications_user_lu_cree_le  (cost=0.00..4.17 rows=2 width=0) (actual time=0.004..0.005 rows=0 loops=1)
+               ->  Bitmap Index Scan on idx_notifications_user_lu_cree_le  (cost=0.00..4.17 rows=2 width=0) (actual time=0.012..0.012 rows=0 loops=1)
                      Index Cond: ((id_utilisateur = '33333333-3333-3333-3333-333333333333'::uuid) AND (lu = false))
- Planning Time: 0.087 ms
- Execution Time: 0.027 ms
+ Planning Time: 0.059 ms
+ Execution Time: 0.141 ms
 (10 rows)
 
                                                                            QUERY PLAN                                                                            
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
- Sort  (cost=8.18..8.18 rows=1 width=267) (actual time=0.017..0.018 rows=0 loops=1)
+ Sort  (cost=8.18..8.18 rows=1 width=267) (actual time=0.043..0.043 rows=0 loops=1)
    Sort Key: cree_le DESC
    Sort Method: quicksort  Memory: 25kB
-   ->  Index Scan using idx_dossiers_id_parent_enfant_statut_cree_le on dossiers d  (cost=0.15..8.17 rows=1 width=267) (actual time=0.012..0.012 rows=0 loops=1)
+   ->  Index Scan using idx_dossiers_id_parent_enfant_statut_cree_le on dossiers d  (cost=0.15..8.17 rows=1 width=267) (actual time=0.038..0.038 rows=0 loops=1)
          Index Cond: ((id_parent = '33333333-3333-3333-3333-333333333333'::uuid) AND (id_enfant = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'::uuid))
- Planning Time: 0.062 ms
- Execution Time: 0.032 ms
+ Planning Time: 0.103 ms
+ Execution Time: 0.067 ms
 (7 rows)
 
 === 14) JSONB : exemples de filtrage ===========================