Bladeren bron

Version 2 chemin : LLm Vision and classic -> les modif a faire : regreoupemet par colonne

Abdenbi 3 dagen geleden
bovenliggende
commit
e96f053e19
43 gewijzigde bestanden met toevoegingen van 1615 en 469 verwijderingen
  1. 1 1
      .gitignore
  2. 1 1
      03 - Scripts/.env
  3. 192 20
      03 - Scripts/Agents.py
  4. 704 0
      03 - Scripts/Consolidation.ipynb
  5. 31 29
      03 - Scripts/clean_DBSCAN.py
  6. 10 9
      03 - Scripts/function_exctract.py
  7. 3 2
      03 - Scripts/main.py
  8. 355 0
      03 - Scripts/mapping.json
  9. 15 20
      03 - Scripts/server.py
  10. 257 376
      03 - Scripts/sfcr-app/src/App.jsx
  11. 5 2
      03 - Scripts/sfcr-app/src/constants.js
  12. 41 9
      03 - Scripts/workflow_agents.py
  13. BIN
      04 - Outputs/2024/CNP_Assurances/Rapport_S.05_page_145.xlsx
  14. BIN
      04 - Outputs/2024/CNP_Assurances/Rapport_S.05_page_146.xlsx
  15. BIN
      04 - Outputs/2025/ACM_Vie_SA/Rapport_S.02_page_56.xlsx
  16. BIN
      04 - Outputs/2025/ACM_Vie_SA/Rapport_S.12_page_59.xlsx
  17. BIN
      04 - Outputs/2025/ACM_Vie_SA/Rapport_S.22_page_60.xlsx
  18. BIN
      04 - Outputs/2025/CNP_Assurances/Rapport_S.02_page_83.xlsx
  19. BIN
      04 - Outputs/2025/CNP_Assurances/Rapport_S.05_page_86.xlsx
  20. BIN
      04 - Outputs/2025/CNP_Assurances/Rapport_S.05_page_87.xlsx
  21. BIN
      04 - Outputs/2025/CNP_Assurances/Rapport_S.12_page_88.xlsx
  22. BIN
      04 - Outputs/2025/CNP_Assurances/Rapport_S.12_page_89.xlsx
  23. BIN
      04 - Outputs/2025/CNP_Assurances/Rapport_S.12_page_90.xlsx
  24. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.02_page_48.xlsx
  25. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.02_page_49.xlsx
  26. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.05_page_50.xlsx
  27. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.05_page_51.xlsx
  28. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.12_page_52.xlsx
  29. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.22_page_55.xlsx
  30. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.25_page_58.xlsx
  31. BIN
      04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.28_page_59.xlsx
  32. BIN
      04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_2.xlsx
  33. BIN
      04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_3.xlsx
  34. BIN
      04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.12_page_7.xlsx
  35. BIN
      04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.22_page_11.xlsx
  36. BIN
      04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.25_page_13.xlsx
  37. BIN
      04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.28_page_14.xlsx
  38. BIN
      04 - Outputs/2025/PREDICA/Rapport_S.17_page_62.xlsx
  39. BIN
      04 - Outputs/2025/PREDICA/Rapport_S.17_page_63.xlsx
  40. BIN
      04 - Outputs/2025/PREDICA/Rapport_S.17_page_64.xlsx
  41. BIN
      04 - Outputs/2025/PREDICA/Rapport_S.17_page_65.xlsx
  42. BIN
      04 - Outputs/2025/PREDICA/Rapport_S.19_page_66.xlsx
  43. BIN
      04 - Outputs/2025/PREDICA/Rapport_S.22_page_67.xlsx

+ 1 - 1
.gitignore

@@ -4,7 +4,7 @@ graph_workflow.png
 # Dossiers de données / projets
 01 - Sources/
 02 - Inputs/
-03 - Outputs/
+04 - Outputs/
 
 # VSCode
 .vscode/

+ 1 - 1
03 - Scripts/.env

@@ -7,7 +7,7 @@ LANGFUSE_BASE_URL="https://cloud.langfuse.com"
 SERPER_API_KEY="5d34667463124d3a83b2b04987a3d80bb1dd3d00"
 GOOGLE_API_KEY="AIzaSyAdLpi0Z6Vh9wxjXY3qrdTKOJ7OvBRC4ZQ"
 GROQ_API_KEY ="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"
-OPENAI_API_KEY="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"
+OPENAI_API_KEY="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"
 MISTRAL_API_KEY="LrPwFxTOrrhORneWamW4VNP4UA0k7jPN"
 
 OPENROUTER_API_KEY="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"

+ 192 - 20
03 - Scripts/Agents.py

@@ -1,6 +1,6 @@
 import os 
 from typing import Annotated , Sequence , TypedDict , Optional , List , Any
-from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage , AIMessage
 from langgraph.graph.message import add_messages
 from dotenv import load_dotenv  
 from langchain_openai import ChatOpenAI 
@@ -9,6 +9,10 @@ import pandas as pd
 from langfuse import get_client
 import json
 from langchain_google_genai import ChatGoogleGenerativeAI 
+import re
+import base64
+import cv2
+from pathlib import Path
 
 from tools import  excel_code_interpreter
 
@@ -48,13 +52,15 @@ class AgentState(TypedDict) :
     section_name : str
     lignes : str
     markdown : str
+    use_vision : bool
+    image_path: str
 
 
 model_gemini = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
 
 model_llama = ChatGroq(model="llama-3.3-70b-versatile") 
 
-model_openai = ChatOpenAI(model="gpt-4o-mini" , temperature=0.2) 
+model_openai = ChatOpenAI(model="gpt-4o" , temperature=0.2) 
 
 model_ai = model_llama.bind_tools(tools)
 
@@ -62,58 +68,84 @@ ocr = PaddleOCR(use_angle_cls=True, lang='fr', det_limit_side_len=10000, show_lo
 
 
 
+def has_rc_codes(points):
+    has_r = False
+    has_c = False
+
+    for p in points:
+        text = str(p[2]).strip()
+
+        if text.startswith("R"):
+            has_r = True
+
+        if text.startswith("C"):
+            has_c = True
+
+    return has_r and has_c
+
+
 def agent_ocr(state: AgentState):
     pdf_path = state.get("pdf_path")
-    # On récupère la page unique envoyée par le main
     page_val = state.get("page")
 
     if page_val is None:
         raise ValueError("page est None dans state")
 
-    # Conversion en entier au cas où
     try:
         page_index = int(page_val) - 1 
     except TypeError:
-        # Si c'est une liste [45], on prend le premier élément
         page_index = int(page_val[0]) - 1
 
     all_points = []
     texte_accumule = []
 
     try:
-        # Traitement de la page unique
         img_finale = preparer_image_zoom_hd(pdf_path, page_index)
         raw_data = extraire_donnees_ocr(img_finale, ocr)
         data_propre = nettoyage_sortie_ocr(raw_data)
 
-        # Structure spatiale
         points = to_points(data_propre)
         all_points.extend(points)
 
-        # Texte pour le LLM
+        # 🎯 CAS ROUGE : Pas de codes R/C détectés -> Mode Vision
+        if not has_rc_codes(points):
+                    print(f"  Pas de codes R/C détectés — activation du mode LLM vision")
+
+                    # 🛠️ SAUVEGARDE ET COMPRESSION TEMPORAIRE
+                    # On utilise le format .jpg avec une qualité de 85% pour réduire drastiquement la taille
+                    chemin_image_temp = f"temp_page_{page_index + 1}.jpg"
+                    import cv2
+                    cv2.imwrite(chemin_image_temp, img_finale, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
+
+                    return {
+                        "use_vision": True,
+                        "image_path": chemin_image_temp,  # 🟢 On ne passe qu'un bête texte de 20 caractères !
+                        "messages": [HumanMessage(content="Mode LLM Vision activé.")],
+                    }
+
+        # Texte pour le LLM (Cas normal)
         lignes_page = formater_donnees_section(data_propre, page_index)
         texte_accumule.extend(lignes_page) 
-
         print(f" Page {page_index + 1} traitée par l'OCR.")
 
     except Exception as e:
         print(f"Erreur lors de l'OCR Page {page_index + 1}: {e}")
         raise e
 
-    # DBSCAN et Markdown
+    # Traitement classique si des codes R/C sont présents
     clusters = cluster_lines(all_points, eps=0.5)
     lignes = build_lines(clusters)
     lignes = merge_close_lines(lignes)
     markdown = transform_to_clean_markdown(lignes)
-    
     contenu_final = "\n".join(texte_accumule)
 
     return {
-        "points" : points , 
+        "points": points, 
         "messages": [HumanMessage(content=f"Voici les données OCR brutes :\n{contenu_final}")],
-        "clusters" : clusters , 
-        "lignes" : lignes ,
-        "markdown": markdown 
+        "clusters": clusters, 
+        "lignes": lignes,
+        "markdown": markdown,
+        "use_vision": False # <-- Cas normal, on continue vers agent_extracteur
     }
 
 
@@ -144,23 +176,163 @@ def agent_extracteur(state: AgentState):
     """
 
     markdown_content = state.get("markdown", "")
-    if not markdown_content or str(markdown_content).strip() == "":
-        backup_content = json.dumps(state.get("lignes", "Aucune donnée trouvée"))
-        input_content = f"Note : Le markdown était vide. Voici les lignes brutes :\n{backup_content}"
+    lignes = state.get("lignes", [])
+
+    # Garde-fou : markdown vide ou trop pauvre (moins de 2 lignes de données)
+    data_rows = [l for l in str(markdown_content).splitlines() if l.strip().startswith("|") and "R0" in l]
+
+    if not markdown_content or not str(markdown_content).strip() or len(data_rows) == 0:
+        backup_content = json.dumps(lignes, ensure_ascii=False)
+        input_content = f"Note : Le markdown était vide ou invalide. Voici les lignes brutes OCR :\n{backup_content}"
     else:
         input_content = markdown_content
 
     msg = [
         SystemMessage(content=prompt),
-        HumanMessage(content=json.dumps(input_content))
+        HumanMessage(content=input_content)  # ← plus de json.dumps() sur du markdown déjà str
     ]
 
-    response = model_llama.invoke(msg)
+    response = model_openai.invoke(msg)
     return {"messages": [response]}
 
 
 
+def encoder_image_en_base64(chemin_image: str) -> str:
+    """Convertit une image locale en chaîne base64 pour l'API OpenAI."""
+    with open(chemin_image, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+
+def agent_llm_vision(state: AgentState):
+    print("[LLM Vision] Début de l'analyse visuelle du tableau QRT...")
+
+    # 1. On récupère le CHEMIN du fichier image
+    image_path = state.get("image_path")
+    section_name = state.get("section_name", "Non spécifiée")
+
+    if not image_path or not os.path.exists(image_path):
+        raise ValueError(f"Le fichier image est introuvable : {image_path}")
+    
+
+    section_name_raw = state.get("section_name")  # Ex: "S.23_page_64" ou "S.02.01_table_1"
+
+    if not section_name_raw:
+        raise ValueError("L'état de l'agent doit contenir un 'section_name' valide.")
+
+    # Extraction de la racine de la section (ex: "S.23" ou "S.02.01")
+    # Cette regex capture tout ce qui commence par S. suivi de chiffres et de points
+    match = re.match(r"^(S\.\d+(?:\.\d+)*)", section_name_raw)
+    if not match:
+        raise ValueError(f"Impossible de déterminer la racine réglementaire depuis : {section_name_raw}")
+    
+
+    section_racine = match.group(1)
+    mapping_path = Path(__file__).resolve().parent / "mapping.json"
+
+    """    print("Mapping path réel =", mapping_path)
+    print("Existe ?", mapping_path.exists())"""
+    
+    try:
+        with open(mapping_path, "r", encoding="utf-8") as f:
+            full_mapping = json.load(f)
+        
+        # On cherche d'abord la racine exacte, sinon on tente une correspondance partielle
+        section_mapping = full_mapping.get(section_racine)
+        
+        if not section_mapping:
+            # Fallback au cas où le JSON contient "S.23.01" mais votre racine est "S.23"
+            alternative_key = next((k for k in full_mapping.keys() if k.startswith(section_racine)), None)
+            if alternative_key:
+                section_mapping = full_mapping[alternative_key]
+            else:
+                raise KeyError(f"Aucun mapping trouvé pour '{section_racine}' (déduit de '{section_name_raw}') dans mapping.json.")
+            
+        mapping_json_reduit = json.dumps(section_mapping, ensure_ascii=False, indent=2)
+        
+    except Exception as e:
+        raise RuntimeError(f"Erreur mapping pour {section_name_raw} : {str(e)}")
+    # 2. Prompt (Identique)
+    PROMPT_VISION_SANS_CODES = f"""
+    Tu es un expert Solvabilité II. Ce tableau est un QRT SFCR sans codes R/C visibles.
+
+    Ta tâche :
+    1. Identifie les lignes (descriptions) et colonnes (headers) du tableau.
+    2. Associe chaque description de ligne au bon code Rxxxx selon la nomenclature Solvabilité II.
+    3. Associe chaque header de colonne au bon code Cxxxx.
+
+    Utilise le mapping suivant :
+
+    {mapping_json_reduit}
+
+    Règles STRICTES :
+        - Utilise directement les codes Rxxxx (lignes) et Cxxxx (colonnes) comme clés.
+        - Supprime uniquement les espaces dans les nombres (ex: "3 297 388" → 3297388).
+        - OPTIMISATION DE TOKENS : Exclura COMPLÈTEMENT du JSON de sortie les lignes/colonnes dont la valeur est égale à 0, vide, "-", "–" ou "N/A". Ne les écris pas.
+        - Structure attendue :
+
+    {{
+    "Cxxxx": {{
+        "Rxxxx": valeur,
+        ...
+    }},
+    ...
+    }}
+
+    - Section détectée : {section_name}
 
+    ⚠️ CONTRAINTE DE SORTIE :
+    - Réponds UNIQUEMENT avec l'objet JSON.
+    - PAS de markdown.
+    - PAS de balises ```json.
+    - PAS de texte explicatif.
+    - PAS de commentaires.
+    - Le JSON doit être valide et directement parsable par json.loads().
+    """
+
+    try:
+        # 3. Encodage à la volée du fichier disque en Base64
+        with open(image_path, "rb") as image_file:
+            base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+        # 4. Préparation du message multimodal
+        msg_vision = HumanMessage(
+            content=[
+                {"type": "text", "text": PROMPT_VISION_SANS_CODES},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+                },
+            ]
+        )
+
+        # 5. Appel OpenAI avec format JSON
+        model_json = model_openai.bind(response_format={"type": "json_object"})
+        response = model_json.invoke([msg_vision])
+
+        # Nettoyage des balises si présentes
+        contenu_propre = response.content.strip()
+        if contenu_propre.startswith("```json"):
+            contenu_propre = contenu_propre.replace("```json", "").replace("```", "").strip()
+
+        print(" [LLM Vision] Extraction réussie.")
+
+        # 6. NETTOYAGE DU DISQUE (Optionnel mais propre)
+        # Supprime le fichier temporaire pour ne pas encombrer votre dossier de travail
+        if os.path.exists(image_path):
+            os.remove(image_path)
+
+        # 7. Retour de la réponse (UNIQUEMENT le texte JSON)
+        # L'image géante n'est PAS stockée dans l'historique du graphe, elle disparaît ici !
+        return {
+            "messages": [AIMessage(content=contenu_propre)],
+            "image_path": None  # On réinitialise la clé à None pour vider le State
+        }
+
+    except Exception as e:
+        # En cas d'erreur, on essaie quand même de nettoyer le fichier
+        if os.path.exists(image_path):
+            os.remove(image_path)
+        print(f" [LLM Vision] Erreur : {e}")
+        raise e
 
 def agent_builder(state: AgentState):
     print(f" Construction du fichier Excel pour : {state['entreprise_name']}...")

+ 704 - 0
03 - Scripts/Consolidation.ipynb

@@ -0,0 +1,704 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "381dd662",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd \n",
+    "import os \n",
+    "import logging \n",
+    "from pathlib import Path\n",
+    "from datetime import datetime\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "13b3f9ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\aiab\\Downloads\\Travaux\\SFCR_extraction_tool\\02 - Inputs\\_QRTs_paramétrages_Abd.xlsx\n"
+     ]
+    }
+   ],
+   "source": [
+    "SCRIPT_DIR   = Path.cwd()                           # 03 - Scripts/\n",
+    "PROJECT_ROOT = SCRIPT_DIR.parent                    # racine projet\n",
+    "OUTPUTS_DIR  = PROJECT_ROOT / \"04 - Outputs\"\n",
+    "BASE_FINALE  = OUTPUTS_DIR / \"base_consolidee_QRT.xlsx\"\n",
+    "LOG_FILE     = OUTPUTS_DIR / \"consolidation.log\"\n",
+    "\n",
+    "PARAM_DIR = PROJECT_ROOT / \"02 - Inputs\" / \"_QRTs_paramétrages_Abd.xlsx\"\n",
+    "\n",
+    "print(PARAM_DIR)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "611d745d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pattern attendu pour les noms de fichiers QRT\n",
+    "PATTERN_FICHIER = re.compile(\n",
+    "    r\"Rapport_S\\.(\\d{2})_page_\\d+\\.xlsx\",\n",
+    "    re.IGNORECASE\n",
+    ")\n",
+    " \n",
+    "# Mise en place du logger (console + fichier)\n",
+    "logging.basicConfig(\n",
+    "    level=logging.INFO,\n",
+    "    format=\"%(asctime)s | %(levelname)-8s | %(message)s\",\n",
+    "    datefmt=\"%H:%M:%S\",\n",
+    "    handlers=[\n",
+    "        logging.StreamHandler(),\n",
+    "        logging.FileHandler(LOG_FILE, mode=\"w\", encoding=\"utf-8\"),\n",
+    "    ],\n",
+    ")\n",
+    "log = logging.getLogger(__name__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "40e02977",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =============================================================================\n",
+    "# ÉTAPE 1 — Scan récursif : trouver tous les fichiers .xlsx\n",
+    "# =============================================================================\n",
+    "\n",
+    "# Lecture unique du fichier de paramétrage (hors boucle pour la performance)\n",
+    "df_param = pd.read_excel(PARAM_DIR, sheet_name=\"Liste SFCR\", header=3)\n",
+    "df_param.columns = df_param.columns.str.strip()\n",
+    "\n",
+    "# Nettoyage de l'année : supprimer les espaces, convertir en str\n",
+    "df_param[\"Année\"] = (\n",
+    "    df_param[\"Année\"]\n",
+    "    .astype(str)\n",
+    "    .str.replace(\" \", \"\", regex=False)\n",
+    "    .str.strip()\n",
+    ")\n",
+    "\n",
+    "# Nettoyage de l'entité : minuscules + strip (pour comparaison insensible à la casse)\n",
+    "df_param[\"entite_clean\"] = (\n",
+    "    df_param[\"Entité\"]\n",
+    "    .astype(str)\n",
+    "    .str.strip()\n",
+    "    .str.lower()\n",
+    ")\n",
+    "\n",
+    "# Nettoyage de l'unité : strip des espaces\n",
+    "df_param[\"Unité\"] = df_param[\"Unité\"].astype(str).str.strip()\n",
+    "\n",
+    "\n",
+    "def scanner_fichiers(outputs_dir: Path) -> list[dict]:\n",
+    "    \"\"\"\n",
+    "    Parcourt 04 - Outputs/{annee}/{entite}/*.xlsx\n",
+    "    Retourne une liste de dicts avec les métadonnées de chaque fichier.\n",
+    "    \"\"\"\n",
+    "    fichiers_trouves = []\n",
+    "\n",
+    "    if not outputs_dir.exists():\n",
+    "        log.error(f\"Dossier introuvable : {outputs_dir}\")\n",
+    "        return fichiers_trouves\n",
+    "\n",
+    "    # Niveau 1 : années (ex: 2023, 2024, 2025)\n",
+    "    for dossier_annee in sorted(outputs_dir.iterdir()):\n",
+    "        if not dossier_annee.is_dir():\n",
+    "            continue\n",
+    "        annee = dossier_annee.name.strip()\n",
+    "\n",
+    "        # On garde uniquement les dossiers dont le nom est une année (4 chiffres)\n",
+    "        if not re.fullmatch(r\"\\d{4}\", annee):\n",
+    "            log.debug(f\"Dossier ignoré (pas une année) : {dossier_annee.name}\")\n",
+    "            continue\n",
+    "\n",
+    "        # Niveau 2 : entités (ex: ACM_Vie_SA, BNP_Cardif)\n",
+    "        for dossier_entite in sorted(dossier_annee.iterdir()):\n",
+    "            if not dossier_entite.is_dir():\n",
+    "                continue\n",
+    "            entite = dossier_entite.name\n",
+    "\n",
+    "            # ── Nettoyage pour le match ──────────────────────────────────────\n",
+    "            # Dossier : ACM_Vie_SA  → remplace _ par espace → minuscules\n",
+    "            entite_clean = entite.replace(\"_\", \" \").strip().lower()\n",
+    "\n",
+    "            # Recherche dans le fichier de paramétrage\n",
+    "            df_filtre = df_param[\n",
+    "                (df_param[\"entite_clean\"] == entite_clean) &\n",
+    "                (df_param[\"Année\"] == annee)\n",
+    "            ]\n",
+    "\n",
+    "            if df_filtre.empty:\n",
+    "                log.warning(\n",
+    "                    f\"  ❌ NO MATCH paramétrage → entite='{entite_clean}' | annee='{annee}'\"\n",
+    "                )\n",
+    "                unite = \"N/A\"   # valeur par défaut si pas trouvé\n",
+    "            else:\n",
+    "                # Extraction de la valeur scalaire (première ligne si doublons)\n",
+    "                unite = str(df_filtre[\"Unité\"].iloc[0]).strip()\n",
+    "\n",
+    "            # Niveau 3 : fichiers Excel\n",
+    "            xlsx_trouves = list(dossier_entite.glob(\"*.xlsx\"))\n",
+    "            if not xlsx_trouves:\n",
+    "                log.warning(f\"Aucun .xlsx dans : {dossier_entite.relative_to(outputs_dir)}\")\n",
+    "                continue\n",
+    "\n",
+    "            for fichier in sorted(xlsx_trouves):\n",
+    "                # On ignore le fichier de sortie s'il est dans le même dossier\n",
+    "                if fichier.name == BASE_FINALE.name:\n",
+    "                    continue\n",
+    "\n",
+    "                fichiers_trouves.append({\n",
+    "                    \"annee\":   annee,\n",
+    "                    \"entite\":  entite,\n",
+    "                    \"chemin\":  fichier,\n",
+    "                    \"fichier\": fichier.name,\n",
+    "                    \"unite\":   unite,\n",
+    "                })\n",
+    "\n",
+    "    log.info(f\"Étape 1 — {len(fichiers_trouves)} fichier(s) .xlsx trouvé(s)\")\n",
+    "    return fichiers_trouves\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "6d39ed8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =============================================================================\n",
+    "# ÉTAPE 2 — Parsing du nom de fichier → extraction de la section\n",
+    "# =============================================================================\n",
+    " \n",
+    "def extraire_section(nom_fichier: str) -> str | None:\n",
+    "    \"\"\"\n",
+    "    Extrait la section depuis le nom du fichier.\n",
+    "    Ex: 'Rapport_S.02_page_56.xlsx' → 'S02'\n",
+    "    Retourne None si le nom ne correspond pas au pattern.\n",
+    "    \"\"\"\n",
+    "    match = PATTERN_FICHIER.match(nom_fichier)\n",
+    "    if match:\n",
+    "        return f\"S{match.group(1)}\"   # ex: '02' → 'S02'\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "a23c3131",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =============================================================================\n",
+    "# ÉTAPE 3 — Lecture et normalisation d'un fichier Excel\n",
+    "# =============================================================================\n",
+    " \n",
+    "def lire_fichier_excel(chemin: Path) -> pd.DataFrame | None:\n",
+    "    \"\"\"\n",
+    "    Lit un fichier QRT et retourne un DataFrame en format long :\n",
+    "        R | C | valeur\n",
+    "    La première ligne = en-têtes de colonnes C (ex: C0020, C0030…)\n",
+    "    La première colonne = codes de lignes R (ex: R0010, R0020…)\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        # Lecture brute — header=0 : première ligne = noms de colonnes\n",
+    "        df_brut = pd.read_excel(chemin, header=0, index_col=0, dtype=str)\n",
+    " \n",
+    "        # Nettoyage des noms de lignes et colonnes\n",
+    "        df_brut.index.name   = \"R\"\n",
+    "        df_brut.columns.name = \"C\"\n",
+    " \n",
+    "        # Suppression des lignes/colonnes entièrement vides\n",
+    "        df_brut.dropna(how=\"all\", inplace=True)\n",
+    "        df_brut.dropna(axis=1, how=\"all\", inplace=True)\n",
+    " \n",
+    "        # Filtrage : on garde uniquement les lignes dont l'index ressemble à un code R\n",
+    "        masque_r = df_brut.index.str.match(r\"^R\\d{4}$\", na=False)\n",
+    "        df_brut  = df_brut[masque_r]\n",
+    " \n",
+    "        # Filtrage : on garde uniquement les colonnes dont le nom ressemble à un code C\n",
+    "        cols_c = [c for c in df_brut.columns if re.match(r\"^C\\d{4}$\", str(c))]\n",
+    "        df_brut = df_brut[cols_c]\n",
+    " \n",
+    "        if df_brut.empty:\n",
+    "            log.warning(f\"  Fichier vide après filtrage R/C : {chemin.name}\")\n",
+    "            return None\n",
+    " \n",
+    "        # Pivot en format long : une ligne par (R, C)\n",
+    "        df_long = (\n",
+    "            df_brut\n",
+    "            .reset_index()\n",
+    "            .melt(id_vars=\"R\", var_name=\"C\", value_name=\"valeur\")\n",
+    "        )\n",
+    "\n",
+    "        df_long[\"valeur\"] = (\n",
+    "        df_long[\"valeur\"]\n",
+    "        .astype(str)\n",
+    "        .str.replace(\"\\u00a0\", \"\", regex=False)  # espace insécable\n",
+    "        .str.replace(\" \", \"\", regex=False)        # espaces normaux\n",
+    "        )\n",
+    "\n",
+    "        \n",
+    " \n",
+    "        # Conversion numérique de la colonne valeur\n",
+    "        df_long[\"valeur\"] = pd.to_numeric(df_long[\"valeur\"], errors=\"coerce\")\n",
+    "\n",
+    "\n",
+    "        # Ne pas garder les 0\n",
+    "        df_long = df_long[df_long[\"valeur\"] != 0]\n",
+    " \n",
+    "        # Suppression des lignes où la valeur est NaN\n",
+    "        df_long.dropna(subset=[\"valeur\"], inplace=True)\n",
+    " \n",
+    "        return df_long\n",
+    " \n",
+    "    except Exception as e:\n",
+    "        log.error(f\"  Erreur lecture {chemin.name} : {e}\")\n",
+    "        return None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "b659cc7b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =============================================================================\n",
+    "# ÉTAPE 4 — Enrichissement et consolidation de tous les fichiers\n",
+    "# =============================================================================\n",
+    "\n",
+    "def consolider(fichiers: list[dict]) -> pd.DataFrame:\n",
+    "    \"\"\"\n",
+    "    Pour chaque fichier :\n",
+    "      1. Vérifie le nom (pattern QRT)\n",
+    "      2. Lit le contenu en format long\n",
+    "      3. Injecte les colonnes entite, annee, section, unite\n",
+    "      4. Calcule valeur_final (× 1000 si kEuros)\n",
+    "      5. Empile tout dans un grand DataFrame\n",
+    "    \"\"\"\n",
+    "    blocs      = []\n",
+    "    nb_ok      = 0\n",
+    "    nb_ignores = 0\n",
+    "    nb_erreurs = 0\n",
+    "\n",
+    "    for meta in fichiers:\n",
+    "        annee   = meta[\"annee\"]\n",
+    "        entite  = meta[\"entite\"]\n",
+    "        chemin  = meta[\"chemin\"]\n",
+    "        fichier = meta[\"fichier\"]\n",
+    "        unite   = meta[\"unite\"]\n",
+    "\n",
+    "        # --- Étape 2 intégrée : vérification du pattern ---\n",
+    "        section = extraire_section(fichier)\n",
+    "        if section is None:\n",
+    "            log.warning(f\"  Ignoré (nom non reconnu) : {fichier}\")\n",
+    "            nb_ignores += 1\n",
+    "            continue\n",
+    "\n",
+    "        log.info(f\"  Lecture : {annee}/{entite}/{fichier} → section {section}\")\n",
+    "\n",
+    "        # --- Étape 3 : lecture ---\n",
+    "        df = lire_fichier_excel(chemin)\n",
+    "        if df is None:\n",
+    "            nb_erreurs += 1\n",
+    "            continue\n",
+    "\n",
+    "        # --- Injection des métadonnées ---\n",
+    "        df.insert(0, \"entite\",  entite)\n",
+    "        df.insert(1, \"annee\",   int(annee))\n",
+    "        df.insert(2, \"section\", section)\n",
+    "        df.insert(3, \"unite\",   unite)\n",
+    "\n",
+    "        # --- Calcul valeur_final ---\n",
+    "        # Si l'unité est kEuros (insensible à la casse), on multiplie par 1000\n",
+    "        unite_lower = str(unite).lower().replace(\" \", \"\")\n",
+    "        if unite_lower in (\"keuro\", \"keuros\", \"k€\", \"milliersd'euros\", \"keur\"):\n",
+    "            df[\"valeur_final\"] = df[\"valeur\"] * 1000\n",
+    "        elif unite_lower in (\"meuro\", \"meuros\", \"m€\", \"meur\"):\n",
+    "            df[\"valeur_final\"] = df[\"valeur\"] * 1000000\n",
+    "        else:\n",
+    "            df[\"valeur_final\"] = df[\"valeur\"]\n",
+    "        blocs.append(df)\n",
+    "        nb_ok += 1\n",
+    "\n",
+    "    log.info(f\"Étape 4 — {nb_ok} fichier(s) consolidé(s) | \"\n",
+    "             f\"{nb_ignores} ignoré(s) | {nb_erreurs} erreur(s)\")\n",
+    "\n",
+    "    if not blocs:\n",
+    "        log.error(\"Aucun bloc à consolider. Vérifiez les fichiers sources.\")\n",
+    "        return pd.DataFrame(columns=[\"entite\", \"annee\", \"section\", \"R\", \"C\",\n",
+    "                                      \"valeur\", \"unite\", \"valeur_final\"])\n",
+    "\n",
+    "    # Empilement de tous les blocs\n",
+    "    df_final = pd.concat(blocs, ignore_index=True)\n",
+    "\n",
+    "    # Ordre logique des colonnes\n",
+    "    df_final = df_final[[\"entite\", \"annee\", \"section\", \"R\", \"C\",\n",
+    "                          \"valeur\", \"unite\", \"valeur_final\"]]\n",
+    "\n",
+    "    # Tri pour lisibilité — on force les colonnes de tri en str pour éviter TypeError\n",
+    "    df_final[\"annee\"]   = df_final[\"annee\"].astype(int)\n",
+    "    df_final[\"entite\"]  = df_final[\"entite\"].astype(str)\n",
+    "    df_final[\"section\"] = df_final[\"section\"].astype(str)\n",
+    "    df_final[\"R\"]       = df_final[\"R\"].astype(str)\n",
+    "    df_final[\"C\"]       = df_final[\"C\"].astype(str)\n",
+    "\n",
+    "    df_final.sort_values(\n",
+    "        by=[\"annee\", \"entite\", \"section\", \"R\", \"C\"],\n",
+    "        inplace=True,\n",
+    "        ignore_index=True,\n",
+    "    )\n",
+    "\n",
+    "    return df_final\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "8773eec3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "17:15:29 | INFO     | === Démarrage consolidation QRT ===\n",
+      "17:15:29 | INFO     | Dossier source : c:\\Users\\aiab\\Downloads\\Travaux\\SFCR_extraction_tool\\04 - Outputs\n",
+      "17:15:29 | INFO     | Étape 1 — 213 fichier(s) .xlsx trouvé(s)\n",
+      "17:15:29 | INFO     |   Lecture : 2023/CNP_Assurances/Rapport_S.02_page_135.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2023/CNP_Assurances/Rapport_S.02_page_136.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2023/CNP_Assurances/Rapport_S.05_page_138.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2023/Groupama_Gan_Vie/Rapport_S.19_page_78.xlsx → section S19\n",
+      "17:15:29 | INFO     |   Lecture : 2024/CNP_Assurances/Rapport_S.02_page_142.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2024/CNP_Assurances/Rapport_S.02_page_143.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2024/CNP_Assurances/Rapport_S.05_page_145.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2024/CNP_Assurances/Rapport_S.05_page_146.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.02_page_56.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.05_page_57.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.05_page_58.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.12_page_59.xlsx → section S12\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.22_page_60.xlsx → section S22\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.23_page_61.xlsx → section S23\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.23_page_62.xlsx → section S23\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.25_page_63.xlsx → section S25\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SA/Rapport_S.28_page_64.xlsx → section S28\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.02_page_54.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.05_page_55.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.05_page_56.xlsx → section S05\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.12_page_57.xlsx → section S12\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.22_page_58.xlsx → section S22\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.23_page_59.xlsx → section S23\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.23_page_60.xlsx → section S23\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.25_page_61.xlsx → section S25\n",
+      "17:15:29 | INFO     |   Lecture : 2025/ACM_Vie_SAM/Rapport_S.28_page_62.xlsx → section S28\n",
+      "17:15:29 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.02_page_81.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.02_page_82.xlsx → section S02\n",
+      "17:15:29 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.05_page_84.xlsx → section S05\n",
+      "17:15:30 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.05_page_84.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.12_page_85.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.12_page_86.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.22_page_87.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.23_page_88.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.23_page_89.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.25_page_90.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.25_page_91.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Allianz_Vie/Rapport_S.28_page_92.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.02_page_48.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.02_page_49.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.05_page_50.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.05_page_51.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.12_page_52.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.22_page_55.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.23_page_56.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.23_page_57.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.25_page_58.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.28_page_59.xlsx → section S28\n",
+      "17:15:30 | WARNING  |   Ignoré (nom non reconnu) : ~$Rapport_S.28_page_59.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.02_page_2.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.02_page_3.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.05_page_6.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.12_page_7.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.22_page_11.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.23_page_12.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.25_page_13.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Cardif_Assurance_Vie/Rapport_S.28_page_14.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.02_page_83.xlsx → section S02\n",
+      "17:15:30 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.02_page_83.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.02_page_84.xlsx → section S02\n",
+      "17:15:30 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.02_page_84.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.05_page_86.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.05_page_87.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.12_page_88.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.12_page_89.xlsx → section S12\n",
+      "17:15:30 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.12_page_89.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.12_page_90.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.17_page_91.xlsx → section S17\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.19_page_92.xlsx → section S19\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.19_page_93.xlsx → section S19\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.22_page_94.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.23_page_95.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.23_page_96.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.25_page_97.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.28_page_98.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/CNP_Assurances/Rapport_S.28_page_99.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.02_page_57.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.02_page_58.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.05_page_59.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.12_page_60.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.22_page_62.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.23_page_63.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.23_page_64.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.25_page_65.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/GMF_Vie/Rapport_S.25_page_66.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.02_page_71.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.02_page_72.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.05_page_73.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.05_page_74.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.12_page_75.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.22_page_78.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.23_page_80.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.25_page_81.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/Groupama_Gan_Vie/Rapport_S.28_page_82.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.02_page_76.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.02_page_77.xlsx → section S02\n",
+      "17:15:30 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.02_page_77.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.05_page_78.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.12_page_79.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.22_page_80.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.23_page_81.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.25_page_82.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/HSBC_Assurances_Vie/Rapport_S.28_page_83.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.02_page_122.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.02_page_123.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.05_page_124.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.05_page_125.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.05_page_126.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.05_page_127.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.22_page_128.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.23_page_129.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.23_page_130.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.23_page_131.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.23_page_132.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.25_page_133.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.25_page_134.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.28_page_253.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_/Rapport_S.28_page_254.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.02_page_273.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.02_page_274.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.05_page_275.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.05_page_276.xlsx → section S05\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.12_page_277.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.12_page_278.xlsx → section S12\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.22_page_279.xlsx → section S22\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.23_page_280.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.23_page_281.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.23_page_282.xlsx → section S23\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.25_page_283.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.25_page_284.xlsx → section S25\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.28_page_285.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/La_Mondiale_Partenaire/Rapport_S.28_page_286.xlsx → section S28\n",
+      "17:15:30 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.02_page_57.xlsx → section S02\n",
+      "17:15:30 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.02_page_58.xlsx → section S02\n",
+      "17:15:30 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.02_page_58.xlsx\n",
+      "17:15:30 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.05_page_59.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.05_page_60.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.05_page_61.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.12_page_62.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.12_page_63.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.22_page_64.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.23_page_65.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.25_page_66.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MACSF_Epargne_Retraite/Rapport_S.28_page_67.xlsx → section S28\n",
+      "17:15:31 | WARNING  |   Ignoré (nom non reconnu) : ~$Rapport_S.02_page_58.xlsx\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.02_page_103.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.02_page_104.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.05_page_105.xlsx → section S05\n",
+      "17:15:31 | ERROR    |   Erreur lecture Rapport_S.05_page_105.xlsx : Can only use .str accessor with string values, not integer\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.05_page_106.xlsx → section S05\n",
+      "17:15:31 | ERROR    |   Erreur lecture Rapport_S.05_page_106.xlsx : Can only use .str accessor with string values, not integer\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.05_page_107.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.12_page_108.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.22_page_109.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.23_page_110.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.23_page_111.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.25_page_112.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/MAIF_Vie/Rapport_S.28_page_114.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.02_page_52.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.02_page_53.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.05_page_55.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.05_page_56.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.05_page_57.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.05_page_58.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.05_page_59.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.12_page_60.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.12_page_61.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.22_page_67.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.23_page_68.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.23_page_70.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.25_page_71.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.28_page_72.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/PREDICA/Rapport_S.28_page_73.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.02_page_32.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.05_page_33.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.05_page_34.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.12_page_36.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.22_page_39.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.23_page_39.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.23_page_40.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.25_page_40.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SOGECAP/Rapport_S.28_page_41.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.02_page_54.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.02_page_55.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.05_page_56.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.05_page_57.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.12_page_58.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.12_page_59.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.22_page_60.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.23_page_61.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.23_page_62.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.25_page_63.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.25_page_64.xlsx → section S25\n",
+      "17:15:31 | WARNING  |   Fichier vide après filtrage R/C : Rapport_S.25_page_64.xlsx\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.28_page_65.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SPIRICA/Rapport_S.28_page_66.xlsx → section S28\n",
+      "17:15:31 | WARNING  |   Ignoré (nom non reconnu) : ~$Rapport_S.25_page_64.xlsx\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.02_page_70.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.02_page_71.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.05_page_72.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.12_page_73.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.22_page_74.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.23_page_75.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.25_page_76.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR/Rapport_S.28_page_77.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.02_page_72.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.02_page_73.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.05_page_74.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.12_page_75.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.23_page_77.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.23_page_78.xlsx → section S23\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.25_page_79.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.28_page_80.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SURAVENIR_SA/Rapport_S.28_page_81.xlsx → section S28\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.02_page_100.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.02_page_99.xlsx → section S02\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.05_page_101.xlsx → section S05\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.12_page_102.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.12_page_103.xlsx → section S12\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.22_page_104.xlsx → section S22\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.25_page_106.xlsx → section S25\n",
+      "17:15:31 | INFO     |   Lecture : 2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.28_page_107.xlsx → section S28\n",
+      "17:15:31 | INFO     | Étape 4 — 201 fichier(s) consolidé(s) | 3 ignoré(s) | 9 erreur(s)\n",
+      "17:15:32 | INFO     | ============================================================\n",
+      "17:15:32 | INFO     | RÉSUMÉ DE LA CONSOLIDATION\n",
+      "17:15:32 | INFO     |   Fichier exporté    : base_consolidee_QRT.xlsx\n",
+      "17:15:32 | INFO     |   Lignes totales     : 4,740\n",
+      "17:15:32 | INFO     |   Entités            : ['ACM_Vie_SA', 'ACM_Vie_SAM', 'Allianz_Vie', 'CNP_Assurances', 'Caisse_Générale_de_Prévoyance_(CGP)', 'Cardif_Assurance_Vie', 'GMF_Vie', 'Groupama_Gan_Vie', 'HSBC_Assurances_Vie', 'La_Mondiale_', 'La_Mondiale_Partenaire', 'MACSF_Epargne_Retraite', 'MAIF_Vie', 'PREDICA', 'SOGECAP', 'SPIRICA', 'SURAVENIR', 'SURAVENIR_SA', 'SwissLife_Assurance_et_Patrimoine']\n",
+      "17:15:32 | INFO     |   Années             : [2023, 2024, 2025]\n",
+      "17:15:32 | INFO     |   Sections           : ['S02', 'S05', 'S12', 'S17', 'S19', 'S22', 'S23', 'S25', 'S28']\n",
+      "17:15:32 | INFO     |   Unités trouvées    : ['EUR', 'KEUR', 'MEUR', 'nan']\n",
+      "17:15:32 | INFO     |   Codes R uniques    : 120\n",
+      "17:15:32 | INFO     |   Codes C uniques    : 40\n",
+      "17:15:32 | INFO     | ============================================================\n",
+      "17:15:32 | INFO     | Terminé en 2.93s\n"
+     ]
+    }
+   ],
+   "source": [
+    "# =============================================================================\n",
+    "# ÉTAPE 5 — Export de la grande base + résumé\n",
+    "# =============================================================================\n",
+    "\n",
+    "def exporter(df: pd.DataFrame, chemin_sortie: Path) -> None:\n",
+    "    \"\"\"\n",
+    "    Sauvegarde la base consolidée en Excel et affiche un résumé.\n",
+    "    \"\"\"\n",
+    "    if df.empty:\n",
+    "        log.error(\"DataFrame vide — export annulé.\")\n",
+    "        return\n",
+    "\n",
+    "    # Écriture Excel avec mise en forme légère\n",
+    "    with pd.ExcelWriter(chemin_sortie, engine=\"openpyxl\") as writer:\n",
+    "        df.to_excel(writer, index=False, sheet_name=\"Base_QRT\")\n",
+    "\n",
+    "        # Ajustement automatique de la largeur des colonnes\n",
+    "        ws = writer.sheets[\"Base_QRT\"]\n",
+    "        for col in ws.columns:\n",
+    "            max_len = max(len(str(cell.value or \"\")) for cell in col)\n",
+    "            ws.column_dimensions[col[0].column_letter].width = max_len + 4\n",
+    "\n",
+    "    # Résumé console — on force str pour éviter TypeError sur sorted()\n",
+    "    log.info(\"=\" * 60)\n",
+    "    log.info(\"RÉSUMÉ DE LA CONSOLIDATION\")\n",
+    "    log.info(f\"  Fichier exporté    : {chemin_sortie.name}\")\n",
+    "    log.info(f\"  Lignes totales     : {len(df):,}\")\n",
+    "    log.info(f\"  Entités            : {sorted(df['entite'].astype(str).unique())}\")\n",
+    "    log.info(f\"  Années             : {sorted(df['annee'].astype(int).unique())}\")\n",
+    "    log.info(f\"  Sections           : {sorted(df['section'].astype(str).unique())}\")\n",
+    "    log.info(f\"  Unités trouvées    : {sorted(df['unite'].astype(str).unique())}\")\n",
+    "    log.info(f\"  Codes R uniques    : {df['R'].nunique()}\")\n",
+    "    log.info(f\"  Codes C uniques    : {df['C'].nunique()}\")\n",
+    "    log.info(\"=\" * 60)\n",
+    "\n",
+    "\n",
+    "# =============================================================================\n",
+    "# POINT D'ENTRÉE PRINCIPAL\n",
+    "# =============================================================================\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    debut = datetime.now()\n",
+    "    log.info(\"=== Démarrage consolidation QRT ===\")\n",
+    "    log.info(f\"Dossier source : {OUTPUTS_DIR}\")\n",
+    "\n",
+    "    # Étape 1 — Scan\n",
+    "    fichiers = scanner_fichiers(OUTPUTS_DIR)\n",
+    "\n",
+    "    if not fichiers:\n",
+    "        log.error(\"Aucun fichier trouvé. Vérifiez le dossier 04 - Outputs/\")\n",
+    "    else:\n",
+    "        # Étapes 2, 3, 4 — Lecture + consolidation\n",
+    "        df_final = consolider(fichiers)\n",
+    "\n",
+    "        # Étape 5 — Export\n",
+    "        exporter(df_final, BASE_FINALE)\n",
+    "\n",
+    "    duree = (datetime.now() - debut).total_seconds()\n",
+    "    log.info(f\"Terminé en {duree:.2f}s\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "SFCR_agents",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

+ 31 - 29
03 - Scripts/clean_DBSCAN.py

@@ -2,57 +2,59 @@
 import json
 
 
-def get_closest_col(x, current_cols):
-    """Trouve la colonne dont l'X est le plus proche de la valeur."""
-    return min(current_cols, key=lambda c: abs(x - c[0]))
+def get_closest_col(x, current_cols, max_distance=8.0):
+    """Trouve la colonne la plus proche, dans une distance max (en unités PDF)."""
+    best = min(current_cols, key=lambda c: abs(x - c[0]))
+    if abs(x - best[0]) <= max_distance:
+        return best
+    return None  # Aucune colonne assez proche
 
 def transform_to_clean_markdown(data):
-    current_cols = [] 
+    current_cols = []
     output_lines = []
-    
+
     for entry in data:
         points = entry.get("points", [])
-        if not points: 
+        if not points:
             continue
-        
-        # 1. Détection et formatage des En-têtes (Cxxxx)
-        headers_in_row = [(p[0], p[2]) for p in points if str(p[2]).startswith('C')]
-        
-        if headers_in_row:
-            # On ajoute un séparateur si un tableau existait déjà avant
+
+        # 1. Détection des en-têtes : tout ce qui est sur une ligne "header"
+        # On détecte la ligne header si elle contient AU MOINS un Cxxxx
+        has_header = any(str(p[2]).startswith('C') for p in points)
+
+        if has_header:
             if output_lines:
                 output_lines.append("\n---\n")
-                
-            current_cols = headers_in_row
+
+            # On prend TOUS les points comme colonnes (y compris 06000, etc.)
+            # sauf ceux qui ressemblent à un label de ligne (Rxxxx)
+            current_cols = [(p[0], p[2]) for p in points if not str(p[2]).startswith('R')]
             col_names = [c[1] for c in current_cols]
-            # Header propre sans coordonnées
+
             output_lines.append(f"| Code | {' | '.join(col_names)} |")
             output_lines.append(f"| :--- | {' | '.join([':---'] * len(col_names))} |")
             continue
 
-        # 2. Détection et alignement des Données (Rxxxx)
+        # 2. Lignes de données (Rxxxx)
         row_label_pt = next((p for p in points if str(p[2]).startswith('R')), None)
-        
+
         if row_label_pt and current_cols:
             row_label = row_label_pt[2]
-            # Initialisation de la ligne avec des "0"
             row_dict = {c[1]: "0" for c in current_cols}
-            
+
             for p in points:
                 x_val, _, text = p
-                if text == row_label: 
+                if text == row_label:
                     continue
-                
-                # Placement précis via coordonnée X
-                best_col = get_closest_col(x_val, current_cols)
-                row_dict[best_col[1]] = str(text).strip()
-            
-            # Construction de la ligne finale
+
+                best_col = get_closest_col(x_val, current_cols, max_distance=8.0)
+                if best_col:
+                    row_dict[best_col[1]] = str(text).strip()
+                # Si aucune colonne proche → on ignore (valeur hors tableau)
+
             ordered_values = [row_dict[c[1]] for c in current_cols]
             output_lines.append(f"| **{row_label}** | {' | '.join(ordered_values)} |")
-            
-        # Note : On ignore volontairement le bloc "else" (INFO) pour nettoyer le bruit
-            
+
     return "\n".join(output_lines)
 
 #print(transform_to_clean_markdown(json))

+ 10 - 9
03 - Scripts/function_exctract.py

@@ -13,17 +13,18 @@ def extract_pdf_path(filename):
     folder_abs = os.path.join(script_dir, "..", "01 - Sources", année_extraction)
     folder_abs = os.path.abspath(folder_abs)
 
-    #print(f"CHEMIN : {folder_abs}")
-    #print(f"EXISTE : {os.path.exists(folder_abs)}")
-    #print(f"CONTENU : {os.listdir(folder_abs) if os.path.exists(folder_abs) else 'DOSSIER INTROUVABLE'}")
-
     all_pdfs = glob.glob(os.path.join(folder_abs, "*.pdf"))
 
     entreprise_lower = entreprise_name.strip().lower()
-    matches = [
-        p for p in all_pdfs
-        if os.path.basename(p).lower().startswith(entreprise_lower)
-    ]
+
+    matches = []
+
+    for p in all_pdfs:
+        filename_only = os.path.basename(p)
+        source_name = filename_only.split("-")[0].strip().lower()
+
+        if source_name == entreprise_lower:
+            matches.append(p)
 
     if matches:
         return matches[0]
@@ -34,7 +35,6 @@ def extract_pdf_path(filename):
             "\n".join([os.path.basename(p) for p in all_pdfs])
         )
 
-
    
 
 def extract_pages(filename):
@@ -88,6 +88,7 @@ def extract_pages(filename):
             pages = [int(p.strip()) for p in str(val).split(",")]
             sections[section] = pages 
         else:
+            print(f"Aucune page trouvée pour la section {section} ")
             sections[section] = []
 
     return sections

+ 3 - 2
03 - Scripts/main.py

@@ -1,6 +1,6 @@
 INPUTS = {
-    "PREDICA_2025.pdf": {
-        "S.22": []
+    "Allianz Vie_2025.pdf": {
+        "S.05": []
     }
 }
 
@@ -9,6 +9,7 @@ from function_exctract import extract_pages, extract_pdf_path
 from dotenv import load_dotenv
 from langfuse.langchain import CallbackHandler
 
+
 # Initialize Langfuse CallbackHandler for Langchain (tracing)
 langfuse_handler = CallbackHandler()
 

+ 355 - 0
03 - Scripts/mapping.json

@@ -0,0 +1,355 @@
+{
+  "S.02": {
+    "rows": {
+      "Immobilisations incorporelles": "R0030",
+      "Actifs d'impôts différés": "R0040",
+      "Excédent du régime de retraite": "R0050",
+      "Immobilisations corporelles usage propre": "R0060",
+      "Investissements": "R0070",
+      "Biens immobiliers": "R0080",
+      "Détentions entreprises liées": "R0090",
+      "Actions": "R0100",
+      "Actions - cotées": "R0110",
+      "Actions - non cotées": "R0120",
+      "Obligations": "R0130",
+      "Obligations d'État": "R0140",
+      "Obligations d'entreprise": "R0150",
+      "Titres structurés": "R0160",
+      "Titres garantis": "R0170",
+      "OPC": "R0180",
+      "Produits dérivés": "R0190",
+      "Dépôts hors équivalents trésorerie": "R0200",
+      "Autres investissements": "R0210",
+      "Actifs UC et indexés": "R0220",
+      "Prêts et prêts hypothécaires": "R0230",
+      "Avances sur police": "R0240",
+      "Prêts particuliers": "R0250",
+      "Autres prêts": "R0260",
+      "Montants récupérables réassurance": "R0270",
+      "Non-vie et santé sim. non-vie": "R0280",
+      "Non-vie hors santé": "R0290",
+      "Santé sim. non-vie": "R0300",
+      "Vie sim. vie, hors santé, UC": "R0310",
+      "Santé sim. vie": "R0320",
+      "Vie hors santé, UC": "R0330",
+      "Vie UC": "R0340",
+      "Dépôts auprès des cédantes": "R0350",
+      "Créances assurance": "R0360",
+      "Créances réassurance": "R0370",
+      "Autres créances": "R0380",
+      "Actions propres auto-détenues": "R0390",
+      "Fonds propres appelés non payés": "R0400",
+      "Trésorerie": "R0410",
+      "Autres actifs": "R0420",
+      "Total de l'actif": "R0500",
+      "PT non-vie": "R0510",
+      "PT non-vie (hors santé)": "R0520",
+      "PT calculées comme un tout_1": "R0530",
+      "Meilleure estimation_1": "R0540",
+      "Marge de risque_1": "R0550",
+      "PT santé (sim. non-vie)": "R0560",
+      "PT calculées comme un tout_2": "R0570",
+      "Meilleure estimation_2": "R0580",
+      "Marge de risque_2": "R0590",
+      "PT vie (hors UC et indexés)": "R0600",
+      "PT santé (sim. vie)": "R0610",
+      "PT calculées comme un tout_3": "R0620",
+      "Meilleure estimation_3": "R0630",
+      "Marge de risque_3": "R0640",
+      "PT vie (hors santé, UC)": "R0650",
+      "PT calculées comme un tout_4": "R0660",
+      "Meilleure estimation_4": "R0670",
+      "Marge de risque_4": "R0680",
+      "PT UC et indexés": "R0690",
+      "PT calculées comme un tout_5": "R0700",
+      "Meilleure estimation_5": "R0710",
+      "Marge de risque_5": "R0720",
+      "Passifs éventuels": "R0740",
+      "Provisions hors PT": "R0750",
+      "Provisions pour retraite": "R0760",
+      "Dépôts des réassureurs": "R0770",
+      "Passifs d'impôts différés": "R0780",
+      "Produits dérivés_passif": "R0790",
+      "Dettes établissements crédit": "R0800",
+      "Dettes financières autres": "R0810",
+      "Dettes assurance": "R0820",
+      "Dettes réassurance": "R0830",
+      "Autres dettes": "R0840",
+      "Passifs subordonnés": "R0850",
+      "Passifs subordonnés hors FPB": "R0860",
+      "Passifs subordonnés inclus FPB": "R0870",
+      "Autres dettes non mentionnées": "R0880",
+      "Total du passif": "R0900",
+      "Excédent d'actif sur passif": "R1000"
+    },
+    "cols": {
+      "Valeur Solvabilité II": "C0010"
+    }
+  } ,
+ 
+
+
+
+  "S.05.01": {
+    "rows": {
+      "Primes émises : Brut - assurance directe": "R0110",
+      "Primes émises : Brut - Réassurance proportionnelle acceptée": "R0120",
+      "Primes émises : Brut - Réassurance non proportionnelle acceptée": "R0130",
+      "Primes émises : Part des réassureurs": "R0140",
+      "Primes émises : Net": "R0200",
+      
+      "Primes acquises : Brut - assurance directe": "R0210",
+      "Primes acquises : Brut - Réassurance proportionnelle acceptée": "R0220",
+      "Primes acquises : Brut - Réassurance non proportionnelle acceptée": "R0230",
+      "Primes acquises : Part des réassureurs": "R0240",
+      "Primes acquises : Net": "R0300",
+      
+      "Charge des sinistres : Brut - assurance directe": "R0310",
+      "Charge des sinistres : Brut - Réassurance proportionnelle acceptée": "R0320",
+      "Charge des sinistres : Brut - Réassurance non proportionnelle acceptée": "R0330",
+      "Charge des sinistres : Part des réassureurs": "R0340",
+      "Charge des sinistres : Net": "R0400",
+      
+      "Variation provisions : Brut - assurance directe": "R0410",
+      "Variation provisions : Brut - Réassurance proportionnelle acceptée": "R0420",
+      "Variation provisions : Brut - Réassurance non proportionnelle acceptée": "R0430",
+      "Variation provisions : Part des réassureurs": "R0440",
+      "Variation provisions : Net": "R0500",
+      
+      "Frais encourus": "R0550",
+      "Autres dépenses": "R1200",
+      "Balance - Autres revenus/charges techniques": "R1210",
+      "Total des dépenses (Non-Vie)": "R1300",
+      
+      "Primes émises Vie : Brut": "R1410",
+      "Primes émises Vie : Part des réassureurs": "R1420",
+      "Primes émises Vie : Net": "R1500",
+      
+      "Primes acquises Vie : Brut": "R1510",
+      "Primes acquises Vie : Part des réassureurs": "R1520",
+      "Primes acquises Vie : Net": "R1600",
+      
+      "Charge des sinistres Vie : Brut": "R1610",
+      "Charge des sinistres Vie : Part des réassureurs": "R1620",
+      "Charge des sinistres Vie : Net": "R1700",
+      
+      "Variation provisions Vie : Brut": "R1710",
+      "Variation provisions Vie : Part des réassureurs": "R1720",
+      "Variation provisions Vie : Net": "R1800",
+      
+      "Dépenses engagées Vie": "R1900",
+      "Autres dépenses Vie": "R2500",
+      "Solde – Autres dépenses/recettes techniques": "R2510",
+      "Total des dépenses (Vie)": "R2600",
+      "Dépenses engagées (Dernière ligne)": "R2700"
+    }
+  } ,
+
+
+"S.12": {
+    "rows": {
+      "PT calculées comme un tout": "R0010",
+      "Montants récupérables réass tout": "R0020",
+      "Meilleure estimation brute": "R0030",
+      "Montants récupérables réass BE": "R0080",
+      "Meilleure estimation nette": "R0090",
+      "Marge de risque": "R0100",
+      "Provisions techniques - Total": "R0200"
+    },
+    "cols": {
+      "Part aux bénéfices": "C0020",
+      "Indexée/UC - Sans options": "C0030",
+      "Indexée/UC - Avec options": "C0050",
+      "Autres Vie - Sans options": "C0070",
+      "Autres Vie - Avec options": "C0080",
+      "Rentes PT Non-Vie ex-Santé": "C0090",
+      "Réassurance acceptée": "C0100",
+      "Total Vie hors Santé": "C0150",
+      "Santé - Sans options": "C0170",
+      "Santé - Avec options": "C0180",
+      "Rentes PT Non-Vie liés Santé": "C0190",
+      "Réassurance Santé acceptée": "C0200",
+      "Total Santé sim. vie": "C0210"
+    }
+  } ,
+
+
+
+  "S.22": {
+    "rows": {
+      "Provisions techniques": "R0010",
+      "Fonds propres de base": "R0020",
+      "Fonds propres éligibles SCR": "R0050",
+      "SCR": "R0090",
+      "Fonds propres éligibles MCR": "R0100",
+      "MCR": "R0110"
+    },
+    "cols": {
+      "Montant avec mesures": "C0010",
+      "Impact transitoires PT": "C0030",
+      "Impact transitoires taux": "C0050",
+      "Impact correction volatilité zéro": "C0070",
+      "Impact ajustement égalisateur zéro": "C0090"
+    }
+  } ,
+
+
+  "S.23": {
+    "rows": {
+      "Capital actions ordinaires": "R0010",
+      "Primes émission actions ordinaires": "R0030",
+      "Fonds initial, cotisations mutuelles": "R0040",
+      "Comptes mutualistes subordonnés": "R0050",
+      "Fonds excédentaires": "R0070",
+      "Actions de préférence": "R0090",
+      "Primes émission actions préférence": "R0110",
+      "Réserve de réconciliation_1": "R0130",
+      "Passifs subordonnés": "R0140",
+      "Actifs impôts différés nets": "R0160",
+      "Autres éléments FPB approuvés": "R0180",
+      "FP non inclus dans réconciliation": "R0220",
+      "Déductions participations financières": "R0230",
+      "Total FPB après déductions": "R0290",
+      "Capital actions non libéré": "R0300",
+      "Fonds initial non libéré": "R0310",
+      "Actions préférence non libérées": "R0320",
+      "Engagements contraignants souscription": "R0330",
+      "Lettres de crédit art 96-2": "R0340",
+      "Lettres de crédit hors art 96-2": "R0350",
+      "Rappels cotisations art 96-3": "R0360",
+      "Rappels cotisations hors art 96-3": "R0370",
+      "Autres fonds propres auxiliaires": "R0390",
+      "Total fonds propres auxiliaires": "R0400",
+      "Total FP disponibles pour SCR": "R0500",
+      "Total FP disponibles pour MCR": "R0510",
+      "Total FP éligibles pour SCR": "R0540",
+      "Total FP éligibles pour MCR": "R0550",
+      "Capital de solvabilité requis": "R0580",
+      "Minimum de capital requis": "R0600",
+      "Ratio FP éligibles / SCR": "R0620",
+      "Ratio FP éligibles / MCR": "R0640",
+      "Excédent d'actif sur passif": "R0700",
+      "Actions propres détenues": "R0710",
+      "Dividendes et charges prévisibles": "R0720",
+      "Autres éléments de FPB": "R0730",
+      "Ajustement éléments restreints": "R0740",
+      "Réserve de réconciliation_2": "R0760",
+      "EPIFP - activités vie": "R0770",
+      "EPIFP - activités non-vie": "R0780",
+      "Total EPIFP": "R0790"
+    },
+    "cols": {
+      "Total": "C0010",
+      "Niveau 1 - non restreint": "C0020",
+      "Niveau 1 - restreint": "C0030",
+      "Niveau 2": "C0040",
+      "Niveau 3": "C0050",
+      "Réconciliation unique": "C0060"
+    }
+  }
+,
+
+
+  "S.25": {
+    "rows": {
+      "Risque de marché": "R0010",
+      "Risque de défaut contrepartie": "R0020",
+      "Risque souscription vie": "R0030",
+      "Risque souscription santé": "R0040",
+      "Risque souscription non-vie": "R0050",
+      "Diversification": "R0060",
+      "Risque immobilisations incorporelles": "R0070",
+      "Capital solvabilité requis de base": "R0100",
+      "Risque opérationnel": "R0130",
+      "Capacité absorption pertes PT": "R0140",
+      "Capacité absorption pertes impôts": "R0150",
+      "Capital requis activités directive 2003/41/CE": "R0160",
+      "SCR excluant exigences supplémentaires": "R0200",
+      "Exigences capital supplémentaire définies": "R0210",
+      "Exigences Art 37-1-a": "R0211",
+      "Exigences Art 37-1-b": "R0212",
+      "Exigences Art 37-1-c": "R0213",
+      "Exigences Art 37-1-d": "R0214",
+      "Capital de solvabilité requis": "R0220",
+      "SCR sous-module actions durée": "R0400",
+      "SCR notionnel part restante": "R0410",
+      "SCR notionnel fonds cantonnés": "R0420",
+      "SCR notionnel portefeuilles ajustement": "R0430",
+      "Diversification agrégation nSCR FC": "R0440",
+      "Approche taux imposition moyen": "R0590",
+      "LAC DT": "R0640",
+      "LAC DT reprise passifs impôts": "R0650",
+      "LAC DT bénéfices futurs": "R0660",
+      "LAC DT report arrière cours": "R0670",
+      "LAC DT report arrière futurs": "R0680",
+      "LAC DT maximale": "R0690"
+    },
+    "cols": {
+      "SCR brut": "C0010",
+      "Simplifications": "C0120",
+      "PPE": "C0090",
+      "Calcul SCR": "C0100",
+      "Taux imposition": "C0109",
+      "Calcul LAC DT": "C0130"
+    }
+  }
+,
+  "S.28": {
+    "rows": {
+      "Formule linéaire engagements non-vie": "R0010",
+      "Frais médicaux et réass proportionnelle": "R0020",
+      "Protection du revenu et réass proportionnelle": "R0030",
+      "Indemnisation travailleurs et réass proportionnelle": "R0040",
+      "RC automobile et réass proportionnelle": "R0050",
+      "Autres véhicules moteur et réass proportionnelle": "R0060",
+      "Maritime, aérien, transport et réass proportionnelle": "R0070",
+      "Incendie, autres dommages et réass proportionnelle": "R0080",
+      "RC générale et réass proportionnelle": "R0090",
+      "Crédit, cautionnement et réass proportionnelle": "R0100",
+      "Protection juridique et réass proportionnelle": "R0110",
+      "Assistance et réass proportionnelle": "R0120",
+      "Pertes pécuniaires diverses et réass proportionnelle": "R0130",
+      "Réassurance santé non proportionnelle": "R0140",
+      "Réassurance accidents non proportionnelle": "R0150",
+      "Réassurance maritime, aérienne, transport non prop": "R0160",
+      "Réassurance dommages non proportionnelle": "R0170",
+      "Formule linéaire engagements vie": "R0200",
+      "Part bénéfices - Prestations garanties": "R0210",
+      "Part bénéfices - Prestations discrétionnaires": "R0220",
+      "Prestations indexées et UC": "R0230",
+      "Autres engagements vie ou santé": "R0240",
+      "Capital sous risque net engagements vie": "R0250",
+      "MCR linéaire": "R0300",
+      "Capital de solvabilité requis": "R0310",
+      "Plafond du MCR": "R0320",
+      "Plancher du MCR": "R0330",
+      "MCR combiné": "R0340",
+      "Seuil plancher absolu du MCR": "R0360",
+      "Minimum de capital requis": "R0400",
+      "Montant notionnel du MCR linéaire": "R0500",
+      "Montant notionnel du SCR hors supp": "R0510",
+      "Plafond du montant notionnel du MCR": "R0520",
+      "Plancher du montant notionnel du MCR": "R0530",
+      "Montant notionnel du MCR combiné": "R0540",
+      "Seuil plancher absolu notionnel MCR": "R0550",
+      "Montant notionnel du MCR": "R0560"
+    },
+    "cols": {
+      "MCR NV Résultat non-vie": "C0010",
+      "MCR NV Résultat vie": "C0020",
+      "NV BE et PT nettes": "C0030",
+      "NV Primes émises 12m nettes": "C0040",
+      "V BE et PT nettes": "C0050",
+      "V Primes émises 12m nettes": "C0060",
+      "MCR V Résultat non-vie": "C0070",
+      "MCR V Résultat vie": "C0080",
+      "Engagements NV BE nettes": "C0090",
+      "Engagements NV Capital sous risque": "C0100",
+      "Engagements V BE nettes": "C0110",
+      "Engagements V Capital sous risque": "C0120",
+      "MCR Global unique": "C0130",
+      "Notionnel Non-Vie": "C0140",
+      "Notionnel Vie": "C0150"
+    }
+  }
+}

+ 15 - 20
03 - Scripts/server.py

@@ -230,26 +230,21 @@ load_dotenv()
 model_llama = ChatGroq(model="llama-3.3-70b-versatile") 
 
 llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)
-
-SYSTEM_PROMPT = """Tu es un assistant spécialisé dans l'extraction de données SFCR (Solvabilité II).
-
-Ton rôle : analyser la demande de l'utilisateur et extraire les informations nécessaires pour lancer une extraction :
-- entreprises (ex: CNP Assurances, AXA France Vie...)
-- année (ex: 2023, 2024...)
-- sections SFCR (parmi : S.02, S.05, S.12, S.17, S.22, S.25, S.28)
-
-Règles :
-- Si des informations manquent, pose UNE seule question à la fois.
-- Quand tu as TOUT (entreprises + année + sections), réponds UNIQUEMENT avec un JSON :
-{
-  "ready": true,
-  "entreprises": ["CNP Assurances", "AXA France Vie"],
-  "année": 2024,
-  "sections": ["S.02", "S.25"]
-}
-- Sinon réponds normalement en français.
-- Ne demande jamais plus d'une information à la fois.
-"""
+SYSTEM_PROMPT = """Tu es un assistant d'extraction SFCR. Sois CONCIS et DIRECT.
+
+Informations à collecter :
+1. Entreprise(s) — parmi : Abeille Epargne Retraite, Abeille Vie, ACM Vie SA, ACM Vie SAM, Allianz Vie, ANTARIUS, AXA Assurances Vie Mutuelle, AXA France Vie, BPCE VIE, Caisse Générale de Prévoyance (CGP), Cardif Assurance Vie, CNP Assurances, Generali Vie, GMF Vie, Groupama Gan Vie, HSBC Assurances Vie, La France Mutualiste, MAAF Vie, MACSF Epargne Retraite, MAIF Vie, MMA Vie SA, Prépar Vie, SMA Vie BTP, SOGECAP, SPIRICA, SURAVENIR SA, SwissLife Assurance et Patrimoine
+2. Année — ex: 2021, 2022, 2023, 2024, 2025
+3. Sections — parmi : S.02, S.05, S.12, S.17, S.22, S.25, S.28
+
+Règles STRICTES :
+- Si l'utilisateur écrit un nom approchant (ex: "CNP", "SNCP"), déduis l'entreprise la plus proche de la liste.
+- Si l'utilisateur écrit "S0.2" ou "s02", c'est S.02. Sois tolérant sur la syntaxe.
+- Pose UNE seule question courte si une info manque.
+- Quand tu as tout, retourne UNIQUEMENT ce JSON sans aucun texte :
+{"ready": true, "entreprises": ["CNP Assurances"], "année": 2025, "sections": ["S.02"]}
+- Réponses courtes, max 1-2 phrases.
+- Ne demande jamais de confirmation si tu as déjà l'info."""
 
 @app.route("/chat-agent", methods=["POST"])
 def chat_agent():

+ 257 - 376
03 - Scripts/sfcr-app/src/App.jsx

@@ -1,7 +1,6 @@
 import { useState, useMemo, useRef, useEffect } from "react";
 import { ALL_COMPANIES, SFCR_SECTIONS, YEARS, CURRENT_YEAR, sectionLabels } from "./constants";
-import { styles, cssString } from "./styles";
-import { C } from "./styles";
+import { styles, cssString, C } from "./styles";
 
 // ─────────────────────────────────────────────
 // VUE EXTRACTION TEMPS RÉEL
@@ -15,7 +14,6 @@ const ExtractionStatusView = ({ progress, logs, onClose, isDone }) => {
 
   return (
     <div style={styles.extractionWrapper}>
-      {/* Header */}
       <div style={styles.extractionHeader}>
         <div>
           <h2 style={styles.pageTitle}>Extraction en cours</h2>
@@ -28,23 +26,17 @@ const ExtractionStatusView = ({ progress, logs, onClose, isDone }) => {
         </button>
       </div>
 
-      {/* Cartes entreprises */}
       <div style={styles.extractionList}>
         {Object.entries(progress).map(([company, companyData]) => {
           const sections = Object.entries(companyData.sections || {});
           const allDone = sections.length > 0 && sections.every(([, s]) => s.status !== "pending" && s.status !== "running");
           const hasError = sections.some(([, s]) => s.status === "error");
 
-            return (
-              <div key={company} style={{
-                ...styles.extractionCard,
-                // La bordure devient rouge vif si hasError est vrai
-                borderColor: allDone 
-                  ? (hasError ? "#ef4444" : "rgba(16,185,129,0.35)") 
-                  : C.border,
-                borderWidth: hasError ? 1 : 1, 
-              }}>   
-              {/* Company header */}
+          return (
+            <div key={company} style={{
+              ...styles.extractionCard,
+              borderColor: allDone ? (hasError ? "#ef4444" : "rgba(16,185,129,0.35)") : C.border,
+            }}>
               <div style={styles.companyStatusHeader}>
                 <div style={{
                   ...styles.companyAvatar,
@@ -59,12 +51,9 @@ const ExtractionStatusView = ({ progress, logs, onClose, isDone }) => {
                   <span style={{ fontSize: 11, color: allDone ? (hasError ? "#ef4444" : C.green) : C.accent }}>
                     {allDone
                       ? hasError ? "⚠ Terminé avec des erreurs" : "✓ Terminé avec succès"
-                      : companyData.currentSection
-                        ? `Traitement ${companyData.currentSection}...`
-                        : "En attente..."}
+                      : companyData.currentSection ? `Traitement ${companyData.currentSection}...` : "En attente..."}
                   </span>
                 </div>
-                {/* Progress pill */}
                 <span style={{
                   fontSize: 11, fontWeight: 700, padding: "3px 10px", borderRadius: 20,
                   background: allDone ? (hasError ? "rgba(239,68,68,0.1)" : C.greenSoft) : C.accentSoft,
@@ -74,50 +63,30 @@ const ExtractionStatusView = ({ progress, logs, onClose, isDone }) => {
                 </span>
               </div>
 
-              {/* Sections timeline */}
               <div style={{ display: "flex", gap: 8, flexWrap: "wrap", padding: "0 4px" }}>
                 {sections.map(([key, sec]) => (
                   <div key={key} style={{
                     display: "flex", flexDirection: "column", alignItems: "center", gap: 5,
                     padding: "10px 14px", borderRadius: 8, fontSize: 11, fontWeight: 600,
                     border: `1px solid`,
-                    borderColor: sec.status === "success"
-                      ? "rgba(16,185,129,0.4)"
-                      : sec.status === "error"
-                        ? "rgba(239,68,68,0.4)"
-                        : sec.status === "running"
-                          ? "rgba(59,130,246,0.4)"
-                          : C.border,
-                    background: sec.status === "success"
-                      ? "rgba(16,185,129,0.08)"
-                      : sec.status === "error"
-                        ? "rgba(239,68,68,0.08)"
-                        : sec.status === "running"
-                          ? C.accentSoft
-                          : C.bg,
-                    color: sec.status === "success"
-                      ? C.green
-                      : sec.status === "error"
-                        ? "#ef4444"
-                        : sec.status === "running"
-                          ? C.accent
-                          : C.textMuted,
+                    borderColor: sec.status === "success" ? "rgba(16,185,129,0.4)"
+                      : sec.status === "error" ? "rgba(239,68,68,0.4)"
+                      : sec.status === "running" ? "rgba(59,130,246,0.4)" : C.border,
+                    background: sec.status === "success" ? "rgba(16,185,129,0.08)"
+                      : sec.status === "error" ? "rgba(239,68,68,0.08)"
+                      : sec.status === "running" ? C.accentSoft : C.bg,
+                    color: sec.status === "success" ? C.green
+                      : sec.status === "error" ? "#ef4444"
+                      : sec.status === "running" ? C.accent : C.textMuted,
                     minWidth: 70,
                   }}>
                     <span style={{ fontSize: 16 }}>
-                      {sec.status === "success" ? "✅"
-                        : sec.status === "error" ? "❌"
-                        : sec.status === "running" ? "⚙️"
-                        : "⏳"}
+                      {sec.status === "success" ? "✅" : sec.status === "error" ? "❌" : sec.status === "running" ? "⚙️" : "⏳"}
                     </span>
                     <span>{sec.section}</span>
                     {sec.page && <span style={{ fontSize: 10, opacity: 0.7 }}>p.{sec.page}</span>}
                     {sec.status === "error" && sec.error && (
-                      <span style={{
-                        fontSize: 9, color: "#ef4444", maxWidth: 120,
-                        textAlign: "center", lineHeight: 1.3, marginTop: 2,
-                        wordBreak: "break-word",
-                      }}>
+                      <span style={{ fontSize: 9, color: "#ef4444", maxWidth: 120, textAlign: "center", lineHeight: 1.3, marginTop: 2, wordBreak: "break-word" }}>
                         {sec.error.slice(0, 60)}{sec.error.length > 60 ? "…" : ""}
                       </span>
                     )}
@@ -129,51 +98,23 @@ const ExtractionStatusView = ({ progress, logs, onClose, isDone }) => {
         })}
 
         {Object.keys(progress).length === 0 && (
-          <div style={{ color: C.textMuted, textAlign: "center", padding: 40 }}>
-            ⏳ Connexion au serveur...
-          </div>
+          <div style={{ color: C.textMuted, textAlign: "center", padding: 40 }}>⏳ Connexion au serveur...</div>
         )}
       </div>
 
-      {/* Terminal logs */}
-      <div style={{
-        marginTop: 24,
-        background: "#0a0c10",
-        border: `1px solid ${C.border}`,
-        borderRadius: 10,
-        overflow: "hidden",
-      }}>
-        <div style={{
-          padding: "8px 14px",
-          borderBottom: `1px solid ${C.border}`,
-          fontSize: 11, fontWeight: 700,
-          color: C.textMuted, letterSpacing: "1px",
-          textTransform: "uppercase",
-          display: "flex", alignItems: "center", gap: 8,
-        }}>
+      <div style={{ marginTop: 24, background: "#0a0c10", border: `1px solid ${C.border}`, borderRadius: 10, overflow: "hidden" }}>
+        <div style={{ padding: "8px 14px", borderBottom: `1px solid ${C.border}`, fontSize: 11, fontWeight: 700, color: C.textMuted, letterSpacing: "1px", textTransform: "uppercase", display: "flex", alignItems: "center", gap: 8 }}>
           <span style={{ color: "#ef4444" }}>●</span>
           <span style={{ color: "#f59e0b" }}>●</span>
           <span style={{ color: C.green }}>●</span>
           <span style={{ marginLeft: 8 }}>Terminal</span>
         </div>
-        <div style={{
-          height: 220,
-          overflowY: "auto",
-          padding: "12px 16px",
-          fontFamily: "'Fira Code', 'Courier New', monospace",
-          fontSize: 12,
-          lineHeight: 1.6,
-          color: "#a3e635",
-        }}>
+        <div style={{ height: 220, overflowY: "auto", padding: "12px 16px", fontFamily: "'Fira Code', 'Courier New', monospace", fontSize: 12, lineHeight: 1.6, color: "#a3e635" }}>
           {logs.map((line, i) => (
             <div key={i} style={{
-              color: line.includes("Erreur") || line.includes("Error") || line.includes("❌")
-                ? "#f87171"
-                : line.includes("✓") || line.includes("terminée") || line.includes("success")
-                  ? "#86efac"
-                  : line.includes("DÉMARRAGE") || line.includes("Traitement")
-                    ? "#93c5fd"
-                    : "#a3e635",
+              color: line.includes("Erreur") || line.includes("Error") || line.includes("❌") ? "#f87171"
+                : line.includes("✓") || line.includes("terminée") || line.includes("success") ? "#86efac"
+                : line.includes("DÉMARRAGE") || line.includes("Traitement") ? "#93c5fd" : "#a3e635",
             }}>
               <span style={{ opacity: 0.4, marginRight: 8 }}>&gt;</span>{line}
             </div>
@@ -185,6 +126,156 @@ const ExtractionStatusView = ({ progress, logs, onClose, isDone }) => {
   );
 };
 
+// ─────────────────────────────────────────────
+// COMPOSANT CHAT AGENT
+// ─────────────────────────────────────────────
+const ChatAgent = ({ onReady }) => {
+  const [messages, setMessages] = useState([
+    { role: "assistant", content: "Bonjour ! Dites-moi ce que vous souhaitez extraire.\n" }
+  ]);
+  const [input, setInput] = useState("");
+  const [loading, setLoading] = useState(false);
+  const messagesEndRef = useRef(null);
+
+  useEffect(() => {
+    messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
+  }, [messages]);
+
+  const send = async () => {
+    if (!input.trim() || loading) return;
+
+    const userMsg = { role: "user", content: input };
+    const newHistory = [...messages, userMsg];
+    setMessages(newHistory);
+    setInput("");
+    setLoading(true);
+
+    try {
+      const response = await fetch("http://localhost:5000/chat-agent", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ history: newHistory }),
+      });
+
+      const result = await response.json();
+      const assistantMsg = { role: "assistant", content: result.reply };
+      const updatedMessages = [...newHistory, assistantMsg];
+      setMessages(updatedMessages);
+
+      // Si l'agent a toutes les infos → remplit le formulaire
+      if (result.ready && result.data) {
+        const { entreprises, année, sections } = result.data;
+
+        // Petit délai pour que l'utilisateur voie le message
+        setTimeout(() => {
+          onReady({ entreprises, année, sections });
+        }, 800);
+
+        setMessages([...updatedMessages, {
+          role: "assistant",
+          content: `Formulaire rempli :\n• ${entreprises.length} entreprise(s)\n• Année : ${année}\n• Sections : ${sections.join(", ")}\n\nCliquez sur **Lancer l'extraction** pour démarrer.`
+        }]);
+      }
+
+    } catch (err) {
+      setMessages(prev => [...prev, { role: "assistant", content: "Erreur de connexion au serveur." }]);
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  return (
+    <section style={styles.card}>
+      <div style={styles.cardHeader}>
+        <span style={{ ...styles.cardBadge, background: "rgba(197, 56, 21, 0.12)", color: "#e91313", border: "1px solid rgba(245,158,11,0.25)" }}>AI</span>
+        <h2 style={styles.cardTitle}>Assistant SFCR</h2>
+        <span style={{ fontSize: 10, color: C.textMuted, marginLeft: "auto" }}>Forsides</span>
+      </div>
+
+      {/* Messages */}
+      <div style={{ display: "flex", flexDirection: "column", gap: 8, maxHeight: 320, overflowY: "auto", marginBottom: 12, paddingRight: 4 }}>
+        {messages.map((msg, i) => (
+          <div key={i} style={{
+            padding: "9px 13px",
+            borderRadius: msg.role === "user" ? "10px 10px 2px 10px" : "10px 10px 10px 2px",
+            fontSize: 13,
+            lineHeight: 1.6,
+            maxWidth: "88%",
+            whiteSpace: "pre-wrap",
+            alignSelf: msg.role === "user" ? "flex-end" : "flex-start",
+            background: msg.role === "user" ? C.accentSoft : C.surfaceHover,
+            border: `1px solid ${msg.role === "user" ? "rgba(59,130,246,0.25)" : C.border}`,
+            color: msg.role === "user" ? C.text : C.textDim,
+          }}>
+            {msg.content}
+          </div>
+        ))}
+
+        {loading && (
+          <div style={{
+            padding: "9px 13px", borderRadius: "10px 10px 10px 2px",
+            background: C.surfaceHover, border: `1px solid ${C.border}`,
+            alignSelf: "flex-start", display: "flex", gap: 4, alignItems: "center",
+          }}>
+            {[0, 1, 2].map(i => (
+              <span key={i} style={{
+                width: 6, height: 6, borderRadius: "50%",
+                background: C.accent, opacity: 0.6,
+                animation: `pulse 1.2s ease-in-out ${i * 0.2}s infinite`,
+              }} />
+            ))}
+          </div>
+        )}
+        <div ref={messagesEndRef} />
+      </div>
+
+      {/* Input */}
+      <div style={{ display: "flex", gap: 8 }}>
+        <input
+          style={{
+            flex: 1,
+            background: C.bg,
+            border: `1px solid ${C.border}`,
+            borderRadius: 8,
+            padding: "9px 13px",
+            color: C.text,
+            fontSize: 13,
+            outline: "none",
+            fontFamily: "inherit",
+          }}
+          placeholder='Ex: "SFCR CNP et AXA 2024, S.02 et S.25"'
+          value={input}
+          onChange={(e) => setInput(e.target.value)}
+          onKeyDown={(e) => e.key === "Enter" && send()}
+        />
+        <button
+          onClick={send}
+          disabled={loading}
+          style={{
+            background: loading ? C.surfaceHover : C.accent,
+            border: "none",
+            borderRadius: 8,
+            color: "#fff",
+            padding: "9px 16px",
+            cursor: loading ? "not-allowed" : "pointer",
+            fontSize: 15,
+            transition: "all 0.15s",
+          }}
+        >
+          ➤
+        </button>
+      </div>
+
+      <style>{`
+        @keyframes pulse {
+          0%, 100% { transform: scale(1); opacity: 0.4; }
+          50% { transform: scale(1.3); opacity: 1; }
+        }
+      `}</style>
+    </section>
+  );
+};
+
 // ─────────────────────────────────────────────
 // COMPOSANT PRINCIPAL
 // ─────────────────────────────────────────────
@@ -203,9 +294,6 @@ export default function App() {
   const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
   const [isExtracting, setIsExtracting] = useState(false);
   const [isDone, setIsDone] = useState(false);
-
-  // État temps réel
-  // progress: { [companyName]: { currentSection, sections: { [key]: { section, page, status, error } } } }
   const [progress, setProgress] = useState({});
   const [logs, setLogs] = useState([]);
 
@@ -234,10 +322,7 @@ export default function App() {
   const toggleCompanySection = (companyId, s) =>
     setSelectedSections((prev) => {
       const cur = prev[companyId] || [];
-      return {
-        ...prev,
-        [companyId]: cur.includes(s) ? cur.filter((x) => x !== s) : [...cur, s],
-      };
+      return { ...prev, [companyId]: cur.includes(s) ? cur.filter((x) => x !== s) : [...cur, s] };
     });
 
   const setCompanyYear = (companyId, year) =>
@@ -248,6 +333,19 @@ export default function App() {
     [selectedCompanies]
   );
 
+  // ── Callback du chat agent : remplit le formulaire automatiquement
+  const handleChatReady = ({ entreprises, année, sections }) => {
+    // Trouve les entreprises dans ALL_COMPANIES
+    const matched = ALL_COMPANIES.filter(c =>
+      entreprises.some(e => c.name.toLowerCase().includes(e.toLowerCase().trim()))
+    );
+    setSelectedCompanies(matched.map(c => c.id));
+    setGlobalYear(année);
+    setYearMode("global");
+    setGlobalSections(sections);
+    setSectionMode("global");
+  };
+
   const handleExtraction = async () => {
     if (selectedCompanies.length === 0) {
       alert("Veuillez sélectionner au moins une entreprise.");
@@ -256,31 +354,21 @@ export default function App() {
 
     const inputs = {};
     selectedCompanyObjects.forEach((company) => {
-      const sections =
-        sectionMode === "global"
-          ? globalSections
-          : selectedSections[company.id] || [];
-      const year =
-        yearMode === "global"
-          ? globalYear
-          : selectedYears[company.id] || CURRENT_YEAR;
+      const sections = sectionMode === "global" ? globalSections : selectedSections[company.id] || [];
+      const year = yearMode === "global" ? globalYear : selectedYears[company.id] || CURRENT_YEAR;
 
       if (sections.length > 0) {
         const filename = `${company.name}_${year}.pdf`;
         inputs[filename] = {};
-        sections.forEach((s) => {
-          inputs[filename][s] = [];
-        });
+        sections.forEach((s) => { inputs[filename][s] = []; });
       }
     });
 
-    // Reset et affichage immédiat
     setProgress({});
     setLogs([]);
     setIsDone(false);
     setIsExtracting(true);
 
-    // SSE streaming
     try {
       const response = await fetch("http://localhost:5000/run-extraction-stream", {
         method: "POST",
@@ -295,17 +383,12 @@ export default function App() {
       while (true) {
         const { done, value } = await reader.read();
         if (done) break;
-
         buffer += decoder.decode(value, { stream: true });
         const lines = buffer.split("\n");
-        buffer = lines.pop(); // Garde l'éventuel fragment incomplet
-
+        buffer = lines.pop();
         for (const line of lines) {
           if (!line.startsWith("data: ")) continue;
-          try {
-            const event = JSON.parse(line.slice(6));
-            handleSSEEvent(event);
-          } catch (_) {}
+          try { handleSSEEvent(JSON.parse(line.slice(6))); } catch (_) {}
         }
       }
     } catch (err) {
@@ -319,65 +402,31 @@ export default function App() {
       case "log":
         setLogs((prev) => [...prev, event.line]);
         break;
-
       case "company_start":
-        setProgress((prev) => ({
-          ...prev,
-          [event.company]: { currentSection: null, sections: {} },
-        }));
+        setProgress((prev) => ({ ...prev, [event.company]: { currentSection: null, sections: {} } }));
         break;
-
       case "section_start":
         setProgress((prev) => {
           const key = `${event.section}_p${event.page}`;
           const comp = prev[event.company] || { sections: {} };
-          return {
-            ...prev,
-            [event.company]: {
-              ...comp,
-              currentSection: event.section,
-              sections: {
-                ...comp.sections,
-                [key]: { section: event.section, page: event.page, status: "running" },
-              },
-            },
-          };
+          return { ...prev, [event.company]: { ...comp, currentSection: event.section, sections: { ...comp.sections, [key]: { section: event.section, page: event.page, status: "running" } } } };
         });
         break;
-
       case "section_done":
         setProgress((prev) => {
           const key = `${event.section}_p${event.page}`;
           const comp = prev[event.company] || { sections: {} };
-          return {
-            ...prev,
-            [event.company]: {
-              ...comp,
-              currentSection: event.status === "success" ? null : comp.currentSection,
-              sections: {
-                ...comp.sections,
-                [key]: {
-                  section: event.section,
-                  page: event.page,
-                  status: event.status,
-                  error: event.error || null,
-                },
-              },
-            },
-          };
+          return { ...prev, [event.company]: { ...comp, currentSection: event.status === "success" ? null : comp.currentSection, sections: { ...comp.sections, [key]: { section: event.section, page: event.page, status: event.status, error: event.error || null } } } };
         });
         break;
-
       case "done":
         setLogs((prev) => [...prev, `\n${event.message}`]);
         setIsDone(true);
         break;
-
       case "error":
         setLogs((prev) => [...prev, `❌ ${event.message}`]);
         setIsDone(true);
         break;
-
       default:
         break;
     }
@@ -396,10 +445,7 @@ export default function App() {
               <span style={styles.brandSub}>·Dataltist</span>
             </span>
           )}
-          <button
-            onClick={() => setSidebarCollapsed((v) => !v)}
-            style={styles.collapseBtn}
-          >
+          <button onClick={() => setSidebarCollapsed((v) => !v)} style={styles.collapseBtn}>
             {sidebarCollapsed ? "›" : "‹"}
           </button>
         </div>
@@ -412,30 +458,18 @@ export default function App() {
           ].map((item) => (
             <button
               key={item.id}
-              onClick={() => {
-                setActiveNav(item.id);
-                if (item.id === "search") setIsExtracting(false);
-                if (item.id === "analyse") setIsExtracting(false);
-              }}
-              style={{
-                ...styles.navBtn,
-                ...(activeNav === item.id ? styles.navBtnActive : {}),
-              }}
+              onClick={() => { setActiveNav(item.id); if (item.id !== "extraction") setIsExtracting(false); }}
+              style={{ ...styles.navBtn, ...(activeNav === item.id ? styles.navBtnActive : {}) }}
               className="nav-btn"
             >
               <span style={styles.navIcon}>{item.icon}</span>
-              {!sidebarCollapsed && (
-                <span style={styles.navLabel}>{item.label}</span>
-              )}
+              {!sidebarCollapsed && <span style={styles.navLabel}>{item.label}</span>}
             </button>
           ))}
         </nav>
 
         <div style={styles.sidebarFooter}>
-          {!sidebarCollapsed && (
-            <span style={styles.footerBadge}>Forsides</span>
-            
-          )}
+          {!sidebarCollapsed && <span style={styles.footerBadge}>Forsides</span>}
         </div>
       </aside>
 
@@ -445,96 +479,52 @@ export default function App() {
           <div style={styles.placeholderPanel}>
             <div style={styles.placeholderIcon}>⊕</div>
             <h2 style={styles.placeholderTitle}>Search Sources</h2>
-            <p style={styles.placeholderDesc}>
-              Section dédiée à la recherche de sources SFCR.
-            </p>
+            <p style={styles.placeholderDesc}>Section dédiée à la recherche de sources SFCR.</p>
           </div>
         ) : activeNav === "analyse" ? (
-            <div style={styles.placeholderPanel}>
-              <div style={styles.placeholderIcon}>⊗</div>
-              <h2 style={styles.placeholderTitle}>Analyse QRT's</h2>
-              <p style={styles.placeholderDesc}>
-                Section dédiée à l'analyse des QRT.
-              </p>
-            </div>
-          ): isExtracting ? (
-          <ExtractionStatusView
-            progress={progress}
-            logs={logs}
-            isDone={isDone}
-            onClose={() => setIsExtracting(false)}
-          />
+          <div style={styles.placeholderPanel}>
+            <div style={styles.placeholderIcon}>⊗</div>
+            <h2 style={styles.placeholderTitle}>Analyse QRT's</h2>
+            <p style={styles.placeholderDesc}>Section dédiée à l'analyse des QRT.</p>
+          </div>
+        ) : isExtracting ? (
+          <ExtractionStatusView progress={progress} logs={logs} isDone={isDone} onClose={() => setIsExtracting(false)} />
         ) : (
           <div style={styles.extractionPanel}>
+            {/* Header */}
             <div style={styles.pageHeader}>
               <div>
                 <h1 style={styles.pageTitle}>Extraction SFCR</h1>
-                <p style={styles.pageSubtitle}>
-                  Sélectionnez les entreprises et paramétrez l'extraction
-                </p>
+                <p style={styles.pageSubtitle}>Sélectionnez les entreprises et paramétrez l'extraction</p>
               </div>
-              <button
-                style={styles.extractBtn}
-                className="extract-btn"
-                onClick={handleExtraction}
-              >
+              <button style={styles.extractBtn} className="extract-btn" onClick={handleExtraction}>
                 <span>▶</span> Lancer l'extraction
               </button>
             </div>
 
             <div style={styles.twoCol}>
+              {/* COLONNE GAUCHE */}
               <div style={styles.leftCol}>
-                {/* 01. SELECTION ENTREPRISES */}
+                {/* 01. SÉLECTION ENTREPRISES */}
                 <section style={styles.card}>
                   <div style={styles.cardHeader}>
                     <span style={styles.cardBadge}>01</span>
                     <h2 style={styles.cardTitle}>Sélection des entreprises</h2>
                     {selectedCompanies.length > 0 && (
-                      <span style={styles.countPill}>
-                        {selectedCompanies.length} sélectionné
-                        {selectedCompanies.length > 1 ? "s" : ""}
-                      </span>
+                      <span style={styles.countPill}>{selectedCompanies.length} sélectionné{selectedCompanies.length > 1 ? "s" : ""}</span>
                     )}
                   </div>
 
                   <div style={styles.searchRow}>
                     <span style={styles.searchIcon}>🔍</span>
-                    <input
-                      style={styles.searchInput}
-                      placeholder="Rechercher une entreprise…"
-                      value={search}
-                      onChange={(e) => setSearch(e.target.value)}
-                    />
+                    <input style={styles.searchInput} placeholder="Rechercher une entreprise…" value={search} onChange={(e) => setSearch(e.target.value)} />
                   </div>
 
                   <div style={styles.selectAllRow}>
                     <label style={styles.checkLabel} className="check-label">
-                      <input
-                        type="checkbox"
-                        checked={
-                          filtered.length > 0 &&
-                          filtered.every((c) =>
-                            selectedCompanies.includes(c.id)
-                          )
-                        }
-                        onChange={toggleAll}
-                        style={styles.hiddenCheck}
-                      />
-                      <span
-                        style={{
-                          ...styles.customCheck,
-                          ...(filtered.length > 0 &&
-                          filtered.every((c) =>
-                            selectedCompanies.includes(c.id)
-                          )
-                            ? styles.customCheckChecked
-                            : {}),
-                        }}
-                      >
-                        {filtered.length > 0 &&
-                          filtered.every((c) =>
-                            selectedCompanies.includes(c.id)
-                          ) && "✓"}
+                      <input type="checkbox" checked={filtered.length > 0 && filtered.every((c) => selectedCompanies.includes(c.id))} onChange={toggleAll} style={styles.hiddenCheck} />
+                      <span style={{ ...styles.customCheck, ...(filtered.length > 0 && filtered.every((c) => selectedCompanies.includes(c.id)) ? styles.customCheckChecked : {}) }}>
+                        {filtered.length > 0 && filtered.every((c) => selectedCompanies.includes(c.id)) && "✓"}
                       </span>
                       <span style={styles.checkLabelText}>Tout sélectionner</span>
                     </label>
@@ -545,49 +535,25 @@ export default function App() {
                       {filtered.map((company) => {
                         const checked = selectedCompanies.includes(company.id);
                         return (
-                          <label
-                            key={company.id}
-                            style={{
-                              ...styles.companyRow,
-                              ...(checked ? styles.companyRowChecked : {}),
-                            }}
-                            className="company-row"
-                          >
-                            <input
-                              type="checkbox"
-                              checked={checked}
-                              onChange={() => toggleCompany(company.id)}
-                              style={styles.hiddenCheck}
-                            />
-                            <span style={styles.companyAvatar}>
-                              {company.logo}
-                            </span>
+                          <label key={company.id} style={{ ...styles.companyRow, ...(checked ? styles.companyRowChecked : {}) }} className="company-row">
+                            <input type="checkbox" checked={checked} onChange={() => toggleCompany(company.id)} style={styles.hiddenCheck} />
+                            <span style={styles.companyAvatar}>{company.logo}</span>
                             <span style={styles.companyName}>{company.name}</span>
-                            <span
-                              style={{
-                                ...styles.customCheck,
-                                ...(checked ? styles.customCheckChecked : {}),
-                              }}
-                            >
-                              {checked && "✓"}
-                            </span>
+                            <span style={{ ...styles.customCheck, ...(checked ? styles.customCheckChecked : {}) }}>{checked && "✓"}</span>
                           </label>
                         );
                       })}
                     </div>
                   </div>
 
-                  {!showAll && ALL_COMPANIES.length > 10 && (
-                    <button
-                      style={styles.showMoreBtn}
-                      onClick={() => setShowAll(true)}
-                    >
-                      Afficher plus ({ALL_COMPANIES.length - 10} entreprises)
+                  {!showAll && ALL_COMPANIES.length > 5 && (
+                    <button style={styles.showMoreBtn} onClick={() => setShowAll(true)}>
+                      Afficher plus ({ALL_COMPANIES.length - 5} entreprises)
                     </button>
                   )}
                 </section>
 
-                {/* 02. ANNEE */}
+                {/* 02. ANNÉE */}
                 {selectedCompanies.length > 0 && (
                   <section style={styles.card}>
                     <div style={styles.cardHeader}>
@@ -595,40 +561,16 @@ export default function App() {
                       <h2 style={styles.cardTitle}>Année de rapport</h2>
                     </div>
                     <div style={styles.modeToggle}>
-                      <button
-                        style={{
-                          ...styles.modeBtn,
-                          ...(yearMode === "global" ? styles.modeBtnActive : {}),
-                        }}
-                        onClick={() => setYearMode("global")}
-                      >
-                        Toutes
-                      </button>
-                      <button
-                        style={{
-                          ...styles.modeBtn,
-                          ...(yearMode === "individual"
-                            ? styles.modeBtnActive
-                            : {}),
-                        }}
-                        onClick={() => setYearMode("individual")}
-                      >
-                        Par entreprise
-                      </button>
+                      {["global", "individual"].map((m) => (
+                        <button key={m} style={{ ...styles.modeBtn, ...(yearMode === m ? styles.modeBtnActive : {}) }} onClick={() => setYearMode(m)}>
+                          {m === "global" ? "Toutes" : "Par entreprise"}
+                        </button>
+                      ))}
                     </div>
                     {yearMode === "global" ? (
                       <div style={styles.yearGrid}>
                         {YEARS.map((y) => (
-                          <button
-                            key={y}
-                            style={{
-                              ...styles.yearChip,
-                              ...(globalYear === y ? styles.yearChipActive : {}),
-                            }}
-                            onClick={() => setGlobalYear(y)}
-                          >
-                            {y}
-                          </button>
+                          <button key={y} style={{ ...styles.yearChip, ...(globalYear === y ? styles.yearChipActive : {}) }} onClick={() => setGlobalYear(y)}>{y}</button>
                         ))}
                       </div>
                     ) : (
@@ -638,18 +580,7 @@ export default function App() {
                             <span style={styles.perCompanyName}>{c.name}</span>
                             <div style={styles.miniYearRow}>
                               {YEARS.map((y) => (
-                                <button
-                                  key={y}
-                                  style={{
-                                    ...styles.miniYearChip,
-                                    ...((selectedYears[c.id] || CURRENT_YEAR) === y
-                                      ? styles.miniYearChipActive
-                                      : {}),
-                                  }}
-                                  onClick={() => setCompanyYear(c.id, y)}
-                                >
-                                  {y}
-                                </button>
+                                <button key={y} style={{ ...styles.miniYearChip, ...((selectedYears[c.id] || CURRENT_YEAR) === y ? styles.miniYearChipActive : {}) }} onClick={() => setCompanyYear(c.id, y)}>{y}</button>
                               ))}
                             </div>
                           </div>
@@ -660,6 +591,7 @@ export default function App() {
                 )}
               </div>
 
+              {/* COLONNE DROITE */}
               <div style={styles.rightCol}>
                 {/* 03. SECTIONS */}
                 {selectedCompanies.length > 0 && (
@@ -669,46 +601,18 @@ export default function App() {
                       <h2 style={styles.cardTitle}>Sections SFCR</h2>
                     </div>
                     <div style={styles.modeToggle}>
-                      <button
-                        style={{
-                          ...styles.modeBtn,
-                          ...(sectionMode === "global"
-                            ? styles.modeBtnActive
-                            : {}),
-                        }}
-                        onClick={() => setSectionMode("global")}
-                      >
-                        Toutes
-                      </button>
-                      <button
-                        style={{
-                          ...styles.modeBtn,
-                          ...(sectionMode === "individual"
-                            ? styles.modeBtnActive
-                            : {}),
-                        }}
-                        onClick={() => setSectionMode("individual")}
-                      >
-                        Par entreprise
-                      </button>
+                      {["global", "individual"].map((m) => (
+                        <button key={m} style={{ ...styles.modeBtn, ...(sectionMode === m ? styles.modeBtnActive : {}) }} onClick={() => setSectionMode(m)}>
+                          {m === "global" ? "Toutes" : "Par entreprise"}
+                        </button>
+                      ))}
                     </div>
                     {sectionMode === "global" ? (
                       <div style={styles.sectionGrid}>
                         {SFCR_SECTIONS.map((s) => (
-                          <button
-                            key={s}
-                            onClick={() => toggleGlobalSection(s)}
-                            style={{
-                              ...styles.sectionChip,
-                              ...(globalSections.includes(s)
-                                ? styles.sectionChipActive
-                                : {}),
-                            }}
-                          >
+                          <button key={s} onClick={() => toggleGlobalSection(s)} style={{ ...styles.sectionChip, ...(globalSections.includes(s) ? styles.sectionChipActive : {}) }}>
                             <span style={styles.sectionCode}>{s}</span>
-                            <span style={styles.sectionLabel}>
-                              {sectionLabels[s]}
-                            </span>
+                            <span style={styles.sectionLabel}>{sectionLabels[s]}</span>
                           </button>
                         ))}
                       </div>
@@ -721,18 +625,7 @@ export default function App() {
                             </div>
                             <div style={styles.miniSectionGrid}>
                               {SFCR_SECTIONS.map((s) => (
-                                <button
-                                  key={s}
-                                  onClick={() => toggleCompanySection(c.id, s)}
-                                  style={{
-                                    ...styles.miniSectionChip,
-                                    ...(selectedSections[c.id]?.includes(s)
-                                      ? styles.miniSectionChipActive
-                                      : {}),
-                                  }}
-                                >
-                                  {s}
-                                </button>
+                                <button key={s} onClick={() => toggleCompanySection(c.id, s)} style={{ ...styles.miniSectionChip, ...(selectedSections[c.id]?.includes(s) ? styles.miniSectionChipActive : {}) }}>{s}</button>
                               ))}
                             </div>
                           </div>
@@ -742,45 +635,35 @@ export default function App() {
                   </section>
                 )}
 
-                {/* RECAPITULATIF */}
+                {/* RÉCAPITULATIF */}
                 {selectedCompanies.length > 0 && (
                   <section style={styles.summaryCard}>
                     <h3 style={styles.summaryTitle}>Récapitulatif</h3>
                     <div style={styles.summaryGrid}>
                       <div style={styles.summaryItem}>
-                        <span style={styles.summaryNum}>
-                          {selectedCompanies.length}
-                        </span>
+                        <span style={styles.summaryNum}>{selectedCompanies.length}</span>
                         <span style={styles.summaryItemLabel}>Entreprise(s)</span>
                       </div>
-
                       <div style={styles.summaryItem}>
-                        <span style={styles.summaryNum}>
-                          {yearMode === "global"
-                            ? globalSections.length
-                            : "Varié"}
-                        </span>
-                        <span style={styles.summaryItemLabel}>Année(s)</span>
+                        <span style={styles.summaryNum}>{yearMode === "global" ? globalYear : "Varié"}</span>
+                        <span style={styles.summaryItemLabel}>Année</span>
                       </div>
-
-
                       <div style={styles.summaryItem}>
-                        <span style={styles.summaryNum}>
-                          {sectionMode === "global"
-                            ? globalSections.length
-                            : "Varié"}
-                        </span>
+                        <span style={styles.summaryNum}>{sectionMode === "global" ? globalSections.length : "Varié"}</span>
                         <span style={styles.summaryItemLabel}>Section(s)</span>
                       </div>
                     </div>
                   </section>
                 )}
 
-                {/* 04. PROMPT */}
-                {selectedCompanies.length == 0 && (
+                {selectedCompanies.length === 0 && (
+                <ChatAgent onReady={handleChatReady} />)}
+
+                {/* 05. PROMPT (si aucune entreprise sélectionnée) */}
+                {/*/*{selectedCompanies.length === 0 && (
                   <section style={styles.card}>
                     <div style={styles.cardHeader}>
-                      <span style={styles.cardBadge}>04</span>
+                      <span style={styles.cardBadge}>05</span>
                       <h2 style={styles.cardTitle}>Prompt d'extraction</h2>
                     </div>
                     <div style={styles.promptWrapper}>
@@ -792,10 +675,8 @@ export default function App() {
                         rows={4}
                       />
                     </div>
-
-                  </section> ) }
-
-
+                  </section>
+                )}*/}
               </div>
             </div>
           </div>

+ 5 - 2
03 - Scripts/sfcr-app/src/constants.js

@@ -25,8 +25,11 @@ export const ALL_COMPANIES = [
   { id: 24, name: "SMA Vie BTP", logo: "SM" },
   { id: 25, name: "SOGECAP", logo: "SO" },
   { id: 26, name: "SPIRICA", logo: "SP" },
-  { id: 27, name: "SURAVENIR SA", logo: "SU" },
-  { id: 28, name: "SwissLife Assurance et Patrimoine", logo: "SW" },
+  { id: 27, name: "SURAVENIR", logo: "SU" },
+  { id: 28, name: "SURAVENIR SA", logo: "SUS" },
+  { id: 29, name: "SwissLife Assurance et Patrimoine", logo: "SW" },
+  { id: 30, name: "La Mondiale ", logo: "MO" },
+  { id: 31, name: "La Mondiale Partenaire", logo: "MP" },
 ];
 
 export const SFCR_SECTIONS = ["S.02", "S.04", "S.05", "S.12", "S.17","S.19", "S.22","S.23", "S.25", "S.28"];

+ 41 - 9
03 - Scripts/workflow_agents.py

@@ -12,33 +12,65 @@ except ImportError:
             return state  # à adapter si besoin
         return RunnableLambda(run_tools)
 
-# Assure-toi que ces imports sont corrects
-from Agents import AgentState, agent_extracteur, agent_builder, agent_ocr, tools
+# Assure-toi d'importer également agent_llm_vision depuis ton fichier Agents
+from Agents import AgentState, agent_extracteur, agent_builder, agent_ocr
+# 💡 Note : Pense à ajouter la fonction agent_llm_vision dans ton fichier Agents.py !
+try:
+    from Agents import agent_llm_vision
+except ImportError:
+    # Fallback temporaire si tu ne l'as pas encore écrit dans Agents.py
+    def agent_llm_vision(state: AgentState):
+        print("🤖 Exécution de l'Agent LLM Vision...")
+        return {"messages": ["Traitement vision effectué"]}
 
 workflow = StateGraph(AgentState)
 
 # 1. Définition des Nœuds
 workflow.add_node("agent_ocr", agent_ocr)
+workflow.add_node("agent_llm_vision", agent_llm_vision) # <-- Nouveau nœud rouge
 workflow.add_node("agent_extracteur", agent_extracteur)
 workflow.add_node("agent_builder", agent_builder)
-workflow.add_node("tools", ToolNode(tools))
 
-# 2. Définition des Arêtes
+
+# 2. Fonction de routage conditionnel
+def route_after_ocr(state: AgentState):
+    """
+    Cette fonction regarde le flag 'use_vision' retourné par agent_ocr
+    pour choisir le chemin dans le graphe.
+    """
+    if state.get("use_vision") is True:
+        return "vision_path"
+    else:
+        return "extracteur_path"
+
+# 3. Définition des Arêtes (Edges)
 workflow.add_edge(START, "agent_ocr")
-workflow.add_edge("agent_ocr", "agent_extracteur")
+
+# 🔀 Flèche conditionnelle (La logique "If true" de ton schéma)
+workflow.add_conditional_edges(
+    "agent_ocr",
+    route_after_ocr,
+    {
+        "vision_path": "agent_llm_vision",     # Si use_vision == True -> Va vers LLM Vision
+        "extracteur_path": "agent_extracteur"  # Si use_vision == False -> Va vers agent_extracteur
+    }
+)
+
+# Suite et fin des flux
 workflow.add_edge("agent_extracteur", "agent_builder")
+workflow.add_edge("agent_llm_vision", "agent_builder")
+workflow.add_edge("agent_builder", END)
 
 
-workflow.add_edge("tools",END)
 
 
 # 5. Compilation
 app = workflow.compile() 
 
 # 6. Graph
-"""try:
+try:
     with open("graph_workflow.png", "wb") as f:
         f.write(app.get_graph().draw_mermaid_png())
-    print(" Graphique du workflow généré sous : graph_workflow.png")
+    print("  Graphique du workflow généré sous : graph_workflow.png")
 except Exception as e:
-    print(f" Erreur génération image : {e}")"""
+    print(f"  Erreur génération image : {e}")

BIN
04 - Outputs/2024/CNP_Assurances/Rapport_S.05_page_145.xlsx


BIN
04 - Outputs/2024/CNP_Assurances/Rapport_S.05_page_146.xlsx


BIN
04 - Outputs/2025/ACM_Vie_SA/Rapport_S.02_page_56.xlsx


BIN
04 - Outputs/2025/ACM_Vie_SA/Rapport_S.12_page_59.xlsx


BIN
04 - Outputs/2025/ACM_Vie_SA/Rapport_S.22_page_60.xlsx


BIN
04 - Outputs/2025/CNP_Assurances/Rapport_S.02_page_83.xlsx


BIN
04 - Outputs/2025/CNP_Assurances/Rapport_S.05_page_86.xlsx


BIN
04 - Outputs/2025/CNP_Assurances/Rapport_S.05_page_87.xlsx


BIN
04 - Outputs/2025/CNP_Assurances/Rapport_S.12_page_88.xlsx


BIN
04 - Outputs/2025/CNP_Assurances/Rapport_S.12_page_89.xlsx


BIN
04 - Outputs/2025/CNP_Assurances/Rapport_S.12_page_90.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.02_page_48.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.02_page_49.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.05_page_50.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.05_page_51.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.12_page_52.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.22_page_55.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.25_page_58.xlsx


BIN
04 - Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.28_page_59.xlsx


BIN
04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_2.xlsx


BIN
04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_3.xlsx


BIN
04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.12_page_7.xlsx


BIN
04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.22_page_11.xlsx


BIN
04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.25_page_13.xlsx


BIN
04 - Outputs/2025/Cardif_Assurance_Vie/Rapport_S.28_page_14.xlsx


BIN
04 - Outputs/2025/PREDICA/Rapport_S.17_page_62.xlsx


BIN
04 - Outputs/2025/PREDICA/Rapport_S.17_page_63.xlsx


BIN
04 - Outputs/2025/PREDICA/Rapport_S.17_page_64.xlsx


BIN
04 - Outputs/2025/PREDICA/Rapport_S.17_page_65.xlsx


BIN
04 - Outputs/2025/PREDICA/Rapport_S.19_page_66.xlsx


BIN
04 - Outputs/2025/PREDICA/Rapport_S.22_page_67.xlsx