script de netoyage des csv et securisation des variables d'environnement
This commit is contained in:
parent
eea1bd8c4b
commit
013083176e
@ -9,13 +9,14 @@ services:
|
|||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: ${POSTGRES_USER:-admin}
|
POSTGRES_USER: ${POSTGRES_USER:-admin}
|
||||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-admin123}
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
POSTGRES_DB: ${POSTGRES_DB:-ptitpas_db}
|
POSTGRES_DB: ${POSTGRES_DB:-ptitpas_db}
|
||||||
ports:
|
ports:
|
||||||
- "5433:5432"
|
- "5433:5432"
|
||||||
volumes:
|
volumes:
|
||||||
# Scripts de migration (ordre important → sync_enums, init, indexes, checks, triggers, import…)
|
# Scripts de migration (init first). NOTE: `00_sync_enums.sql` is intentionally
|
||||||
- ./migrations/00_sync_enums.sql:/docker-entrypoint-initdb.d/00_sync_enums.sql
|
# NOT mounted here because it must be executed after `01_init.sql` (it alters
|
||||||
|
# existing enum types). The local pipeline applies it explicitly after init.
|
||||||
- ./migrations/01_init.sql:/docker-entrypoint-initdb.d/01_init.sql
|
- ./migrations/01_init.sql:/docker-entrypoint-initdb.d/01_init.sql
|
||||||
- ./migrations/02_indexes.sql:/docker-entrypoint-initdb.d/02_indexes.sql
|
- ./migrations/02_indexes.sql:/docker-entrypoint-initdb.d/02_indexes.sql
|
||||||
- ./migrations/03_checks.sql:/docker-entrypoint-initdb.d/03_checks.sql
|
- ./migrations/03_checks.sql:/docker-entrypoint-initdb.d/03_checks.sql
|
||||||
@ -39,7 +40,7 @@ services:
|
|||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@ptits-pas.fr}
|
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@ptits-pas.fr}
|
||||||
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin123}
|
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD}
|
||||||
ports:
|
ports:
|
||||||
- "8081:80"
|
- "8081:80"
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|||||||
@ -4,9 +4,9 @@ services:
|
|||||||
container_name: ynov-postgres
|
container_name: ynov-postgres
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_USER: admin
|
POSTGRES_USER: ${POSTGRES_USER:-admin}
|
||||||
POSTGRES_PASSWORD: admin123
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
POSTGRES_DB: ptitpas_db
|
POSTGRES_DB: ${POSTGRES_DB:-ptitpas_db}
|
||||||
volumes:
|
volumes:
|
||||||
- ./migrations/01_init.sql:/docker-entrypoint-initdb.d/01_init.sql
|
- ./migrations/01_init.sql:/docker-entrypoint-initdb.d/01_init.sql
|
||||||
- postgres_data:/var/lib/postgresql/data
|
- postgres_data:/var/lib/postgresql/data
|
||||||
@ -18,8 +18,8 @@ services:
|
|||||||
container_name: ynov-pgadmin
|
container_name: ynov-pgadmin
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
environment:
|
environment:
|
||||||
PGADMIN_DEFAULT_EMAIL: admin@ptits-pas.fr
|
PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@ptits-pas.fr}
|
||||||
PGADMIN_DEFAULT_PASSWORD: admin123
|
PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD}
|
||||||
depends_on:
|
depends_on:
|
||||||
- db
|
- db
|
||||||
labels:
|
labels:
|
||||||
|
|||||||
3
makefile
3
makefile
@ -75,8 +75,6 @@ logs:
|
|||||||
psql:
|
psql:
|
||||||
docker exec -it $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB)
|
docker exec -it $(PG_CONTAINER) psql -U $(PG_USER) -d $(PG_DB)
|
||||||
|
|
||||||
stop:
|
|
||||||
docker compose -f docker-compose.dev.yml down
|
|
||||||
stop:
|
stop:
|
||||||
docker compose -f docker-compose.dev.yml down
|
docker compose -f docker-compose.dev.yml down
|
||||||
|
|
||||||
@ -84,3 +82,4 @@ sync-enums:
|
|||||||
@echo "🔁 Génération de migrations/00_sync_enums.sql depuis les CSV"
|
@echo "🔁 Génération de migrations/00_sync_enums.sql depuis les CSV"
|
||||||
@python3 scripts/sync_enums.py > migrations/00_sync_enums.sql
|
@python3 scripts/sync_enums.py > migrations/00_sync_enums.sql
|
||||||
@echo "✅ migrations/00_sync_enums.sql générée. Relis le fichier avant de l'appliquer."
|
@echo "✅ migrations/00_sync_enums.sql générée. Relis le fichier avant de l'appliquer."
|
||||||
|
|
||||||
|
|||||||
@ -2,10 +2,11 @@
|
|||||||
"""scripts/clean_csv.py
|
"""scripts/clean_csv.py
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python3 scripts/clean_csv.py [input_dir] [--out-dir OUT] [--dry-run]
|
python3 scripts/clean_csv.py [input_dir] [--out-dir OUT] [--dry-run]
|
||||||
|
|
||||||
This script cleans CSV files (trim, remove 'NULL', fix column count) and
|
Ce script nettoie les fichiers CSV (trim, suppression de 'NULL', correction du
|
||||||
performs simple validations (UUID-looking columns, date columns).
|
nombre de colonnes) et réalise des validations simples (colonnes ressemblant
|
||||||
|
à des UUID, colonnes date).
|
||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
@ -46,7 +47,7 @@ def clean_csv_file(file_path: Path, out_path: Path = None, dry_run: bool = False
|
|||||||
nb_cols = len(header)
|
nb_cols = len(header)
|
||||||
cleaned_rows.append([h.strip() for h in header])
|
cleaned_rows.append([h.strip() for h in header])
|
||||||
|
|
||||||
# Heuristics for validations
|
# Heuristiques pour les validations
|
||||||
uuid_cols = [i for i, h in enumerate(header) if h.lower() == 'id' or h.lower().endswith('_id')]
|
uuid_cols = [i for i, h in enumerate(header) if h.lower() == 'id' or h.lower().endswith('_id')]
|
||||||
date_cols = [i for i, h in enumerate(header) if any(k in h.lower() for k in ('date', '_le', '_at'))]
|
date_cols = [i for i, h in enumerate(header) if any(k in h.lower() for k in ('date', '_le', '_at'))]
|
||||||
|
|
||||||
@ -63,15 +64,15 @@ def clean_csv_file(file_path: Path, out_path: Path = None, dry_run: bool = False
|
|||||||
for ci in uuid_cols:
|
for ci in uuid_cols:
|
||||||
if ci < len(row) and row[ci]:
|
if ci < len(row) and row[ci]:
|
||||||
if not is_uuid(row[ci]):
|
if not is_uuid(row[ci]):
|
||||||
errors.append(f"Line {i}: column {header[ci]} not UUID-like: {row[ci]!r}")
|
errors.append(f"Ligne {i} : colonne {header[ci]} ne ressemble pas à un UUID : {row[ci]!r}")
|
||||||
for ci in date_cols:
|
for ci in date_cols:
|
||||||
if ci < len(row) and row[ci]:
|
if ci < len(row) and row[ci]:
|
||||||
if not looks_like_date(row[ci]):
|
if not looks_like_date(row[ci]):
|
||||||
errors.append(f"Line {i}: column {header[ci]} not ISO-like date: {row[ci]!r}")
|
errors.append(f"Ligne {i} : colonne {header[ci]} n'est pas une date ISO-like : {row[ci]!r}")
|
||||||
|
|
||||||
cleaned_rows.append(row)
|
cleaned_rows.append(row)
|
||||||
|
|
||||||
# Write output if not dry-run
|
# Écrire la sortie si non --dry-run
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
target = out_path if out_path else file_path
|
target = out_path if out_path else file_path
|
||||||
with target.open('w', encoding='utf-8', newline='') as outfile:
|
with target.open('w', encoding='utf-8', newline='') as outfile:
|
||||||
@ -99,11 +100,11 @@ def main():
|
|||||||
res = clean_csv_file(file_path, out_path, args.dry_run)
|
res = clean_csv_file(file_path, out_path, args.dry_run)
|
||||||
results.append(res)
|
results.append(res)
|
||||||
|
|
||||||
# Report
|
# Rapport
|
||||||
any_errors = False
|
any_errors = False
|
||||||
for r in results:
|
for r in results:
|
||||||
if 'error' in r:
|
if 'error' in r:
|
||||||
print(f"[WARN] {r['file']}: {r['error']}")
|
print(f"[AVERT] {r['file']}: {r['error']}")
|
||||||
if r.get('errors'):
|
if r.get('errors'):
|
||||||
any_errors = True
|
any_errors = True
|
||||||
print(f"[ERR] {r['file']} ->")
|
print(f"[ERR] {r['file']} ->")
|
||||||
@ -113,10 +114,10 @@ def main():
|
|||||||
print(f"[OK] {r['file']}")
|
print(f"[OK] {r['file']}")
|
||||||
|
|
||||||
if any_errors:
|
if any_errors:
|
||||||
print('\nSome files have validation issues. Fix them or run with --dry-run to inspect.')
|
print('\nCertains fichiers présentent des problèmes de validation. Corrigez-les ou lancez avec --dry-run pour inspecter.')
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
else:
|
else:
|
||||||
print('\nAll files cleaned successfully.')
|
print('\nTous les fichiers ont été nettoyés avec succès.')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
40
verify.log
40
verify.log
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
executed_at
|
executed_at
|
||||||
-------------------------------
|
-------------------------------
|
||||||
2025-09-19 09:15:01.473058+00
|
2025-09-22 08:30:37.786643+00
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
=== 1) Comptes & répartition par rôle ==========================
|
=== 1) Comptes & répartition par rôle ==========================
|
||||||
@ -126,49 +126,49 @@
|
|||||||
=== 13) Performance : EXPLAIN sur requêtes clés ===============
|
=== 13) Performance : EXPLAIN sur requêtes clés ===============
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
----------------------------------------------------------------------------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
Limit (cost=11.31..11.31 rows=3 width=89) (actual time=0.035..0.036 rows=0 loops=1)
|
Limit (cost=11.31..11.31 rows=3 width=89) (actual time=0.020..0.021 rows=0 loops=1)
|
||||||
-> Sort (cost=11.31..11.31 rows=3 width=89) (actual time=0.032..0.033 rows=0 loops=1)
|
-> Sort (cost=11.31..11.31 rows=3 width=89) (actual time=0.019..0.019 rows=0 loops=1)
|
||||||
Sort Key: cree_le DESC
|
Sort Key: cree_le DESC
|
||||||
Sort Method: quicksort Memory: 25kB
|
Sort Method: quicksort Memory: 25kB
|
||||||
-> Bitmap Heap Scan on messages m (cost=4.17..11.28 rows=3 width=89) (actual time=0.023..0.024 rows=0 loops=1)
|
-> Bitmap Heap Scan on messages m (cost=4.17..11.28 rows=3 width=89) (actual time=0.013..0.014 rows=0 loops=1)
|
||||||
Recheck Cond: (id_dossier = 'dddddddd-dddd-dddd-dddd-dddddddddddd'::uuid)
|
Recheck Cond: (id_dossier = 'dddddddd-dddd-dddd-dddd-dddddddddddd'::uuid)
|
||||||
-> Bitmap Index Scan on idx_messages_id_dossier_cree_le (cost=0.00..4.17 rows=3 width=0) (actual time=0.012..0.013 rows=0 loops=1)
|
-> Bitmap Index Scan on idx_messages_id_dossier_cree_le (cost=0.00..4.17 rows=3 width=0) (actual time=0.006..0.007 rows=0 loops=1)
|
||||||
Index Cond: (id_dossier = 'dddddddd-dddd-dddd-dddd-dddddddddddd'::uuid)
|
Index Cond: (id_dossier = 'dddddddd-dddd-dddd-dddd-dddddddddddd'::uuid)
|
||||||
Planning Time: 1.468 ms
|
Planning Time: 0.106 ms
|
||||||
Execution Time: 0.521 ms
|
Execution Time: 0.105 ms
|
||||||
(10 rows)
|
(10 rows)
|
||||||
|
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
-----------------------------------------------------------------------------------------------------------------------------------------------------
|
-----------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
Index Scan using idx_evenements_id_enfant_date_debut on evenements ev (cost=0.15..8.17 rows=1 width=161) (actual time=0.006..0.007 rows=0 loops=1)
|
Index Scan using idx_evenements_id_enfant_date_debut on evenements ev (cost=0.15..8.17 rows=1 width=161) (actual time=0.021..0.021 rows=0 loops=1)
|
||||||
Index Cond: ((id_enfant = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'::uuid) AND (date_debut >= '2025-01-01 00:00:00+00'::timestamp with time zone))
|
Index Cond: ((id_enfant = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'::uuid) AND (date_debut >= '2025-01-01 00:00:00+00'::timestamp with time zone))
|
||||||
Planning Time: 0.117 ms
|
Planning Time: 0.064 ms
|
||||||
Execution Time: 0.025 ms
|
Execution Time: 0.032 ms
|
||||||
(4 rows)
|
(4 rows)
|
||||||
|
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
------------------------------------------------------------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
Limit (cost=9.52..9.53 rows=2 width=73) (actual time=0.012..0.013 rows=0 loops=1)
|
Limit (cost=9.52..9.53 rows=2 width=73) (actual time=0.018..0.019 rows=0 loops=1)
|
||||||
-> Sort (cost=9.52..9.53 rows=2 width=73) (actual time=0.011..0.012 rows=0 loops=1)
|
-> Sort (cost=9.52..9.53 rows=2 width=73) (actual time=0.017..0.018 rows=0 loops=1)
|
||||||
Sort Key: cree_le DESC
|
Sort Key: cree_le DESC
|
||||||
Sort Method: quicksort Memory: 25kB
|
Sort Method: quicksort Memory: 25kB
|
||||||
-> Bitmap Heap Scan on notifications n (cost=4.17..9.51 rows=2 width=73) (actual time=0.007..0.008 rows=0 loops=1)
|
-> Bitmap Heap Scan on notifications n (cost=4.17..9.51 rows=2 width=73) (actual time=0.014..0.014 rows=0 loops=1)
|
||||||
Recheck Cond: ((id_utilisateur = '33333333-3333-3333-3333-333333333333'::uuid) AND (NOT lu))
|
Recheck Cond: ((id_utilisateur = '33333333-3333-3333-3333-333333333333'::uuid) AND (NOT lu))
|
||||||
-> Bitmap Index Scan on idx_notifications_user_lu_cree_le (cost=0.00..4.17 rows=2 width=0) (actual time=0.004..0.005 rows=0 loops=1)
|
-> Bitmap Index Scan on idx_notifications_user_lu_cree_le (cost=0.00..4.17 rows=2 width=0) (actual time=0.012..0.012 rows=0 loops=1)
|
||||||
Index Cond: ((id_utilisateur = '33333333-3333-3333-3333-333333333333'::uuid) AND (lu = false))
|
Index Cond: ((id_utilisateur = '33333333-3333-3333-3333-333333333333'::uuid) AND (lu = false))
|
||||||
Planning Time: 0.087 ms
|
Planning Time: 0.059 ms
|
||||||
Execution Time: 0.027 ms
|
Execution Time: 0.141 ms
|
||||||
(10 rows)
|
(10 rows)
|
||||||
|
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
-----------------------------------------------------------------------------------------------------------------------------------------------------------------
|
-----------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
Sort (cost=8.18..8.18 rows=1 width=267) (actual time=0.017..0.018 rows=0 loops=1)
|
Sort (cost=8.18..8.18 rows=1 width=267) (actual time=0.043..0.043 rows=0 loops=1)
|
||||||
Sort Key: cree_le DESC
|
Sort Key: cree_le DESC
|
||||||
Sort Method: quicksort Memory: 25kB
|
Sort Method: quicksort Memory: 25kB
|
||||||
-> Index Scan using idx_dossiers_id_parent_enfant_statut_cree_le on dossiers d (cost=0.15..8.17 rows=1 width=267) (actual time=0.012..0.012 rows=0 loops=1)
|
-> Index Scan using idx_dossiers_id_parent_enfant_statut_cree_le on dossiers d (cost=0.15..8.17 rows=1 width=267) (actual time=0.038..0.038 rows=0 loops=1)
|
||||||
Index Cond: ((id_parent = '33333333-3333-3333-3333-333333333333'::uuid) AND (id_enfant = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'::uuid))
|
Index Cond: ((id_parent = '33333333-3333-3333-3333-333333333333'::uuid) AND (id_enfant = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'::uuid))
|
||||||
Planning Time: 0.062 ms
|
Planning Time: 0.103 ms
|
||||||
Execution Time: 0.032 ms
|
Execution Time: 0.067 ms
|
||||||
(7 rows)
|
(7 rows)
|
||||||
|
|
||||||
=== 14) JSONB : exemples de filtrage ===========================
|
=== 14) JSONB : exemples de filtrage ===========================
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user