1 mesiac pred · 60f8eab15e
--- a/Scripts/Agents.py
+++ b/Scripts/Agents.py
@@ -8,6 +8,7 @@ from langchain_groq import ChatGroq
 
															 import pandas as pd
														
 
															 from langfuse import get_client
														
 
															 import json
														
 
															+from langchain_google_genai import ChatGoogleGenerativeAI 
														
 
															 from tools import  excel_code_interpreter
														
@@ -49,9 +50,11 @@ class AgentState(TypedDict) :
 
															     markdown : str
														
 
															+model_gemini = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
														
 
															+
														
 
															 model_llama = ChatGroq(model="llama-3.3-70b-versatile") 
														
 
															-model_openai = ChatOpenAI(model="gpt-4.1" , temperature=0.2) 
														
 
															+model_openai = ChatOpenAI(model="gpt-4o-mini" , temperature=0.2) 
														
 
															 model_ai = model_llama.bind_tools(tools)
														
@@ -89,7 +92,7 @@ def agent_ocr(state: AgentState):
 
															         # Texte pour le LLM
														
 
															         lignes_page = formater_donnees_section(data_propre, page_index)
														
 
															-        texte_accumule.extend(lignes_page)
														
 
															+        texte_accumule.extend(lignes_page) 
														
 
															         print(f" Page {page_index + 1} traitée par l'OCR.")
														
@@ -106,6 +109,7 @@ def agent_ocr(state: AgentState):
 
															     contenu_final = "\n".join(texte_accumule)
														
 
															     return {
														
 
															+        "points" : points , 
														
 
															         "messages": [HumanMessage(content=f"Voici les données OCR brutes :\n{contenu_final}")],
														
 
															         "clusters" : clusters , 
														
 
															         "lignes" : lignes ,
														
@@ -118,23 +122,25 @@ def agent_ocr(state: AgentState):
 
															 def agent_extracteur(state: AgentState):
														
 
															     prompt = """
														
 
															     Tu es un extracteur de tableaux QRT (Solvabilité II).
														
 
															-    Objectif : convertir le tableau en JSON sans perte d’information.
														
 
															+    Objectif : convertir le tableau markdown en JSON sans AUCUNE modification.
														
 
															-    Règles :
														
 
															+    Règles STRICTES :
														
 
															+    - La position de chaque valeur dans le tableau est ABSOLUE et NE DOIT PAS être modifiée.
														
 
															+    - Si une colonne contient 0, garde 0. Ne déplace jamais une valeur vers une autre colonne.
														
 
															     - Utilise Rxxxx (lignes) et Cxxxx (colonnes) comme clés directement.
														
 
															-    - Copie STRICTEMENT les valeurs, sans calcul.
														
 
															+    - Copie STRICTEMENT les valeurs dans leur colonne exacte.
														
 
															     - Supprime uniquement les espaces dans les nombres : "3 297 388" → 3297388.
														
 
															-    - Garde toute la précision et les 0.
														
 
															+    - Une colonne à 0 reste à 0, même si une valeur non-nulle existe dans une colonne adjacente.
														
 
															     - Structure attendue : {"Cxxxx": {"Rxxxx": valeur, ...}, ...}
														
 
															     ⚠️ CONTRAINTE DE SORTIE :
														
 
															     - Réponds UNIQUEMENT avec l'objet JSON.
														
 
															     - PAS de markdown (```json), PAS de texte, PAS d'explications.
														
 
															+    - NE PAS réorganiser, NE PAS interpréter, NE PAS corriger les données.
														
 
															+
														
 
															+    Tableau à convertir :
														
 
															+    {markdown}
														
 
															-    Sortie attendue :
														
 
															-    {
														
 
															-        "Cxxxx": { "Rxxxx": 123456, "Ryyyy": 789 }
														
 
															-    }
														
 
															     """
														
 
															     markdown_content = state.get("markdown", "")
														
@@ -149,7 +155,7 @@ def agent_extracteur(state: AgentState):
 
															         HumanMessage(content=json.dumps(input_content))
														
 
															     ]
														
 
															-    response = model_openai.invoke(msg)
														
 
															+    response = model_llama.invoke(msg)
														
 
															     return {"messages": [response]}
														
--- a/Scripts/function.py
+++ b/Scripts/function.py
@@ -211,7 +211,7 @@ def prepare_for_dbscan(points):
 
															 from sklearn.cluster import DBSCAN
														
 
															-def cluster_lines(points, eps=0.4, min_samples=2):
														
 
															+def cluster_lines(points, eps=0.9, min_samples=1):
														
 
															     coords = prepare_for_dbscan(points)
														
 
															     db = DBSCAN(eps=eps, min_samples=min_samples)
														
--- a/Scripts/main.py
+++ b/Scripts/main.py
@@ -1,8 +1,6 @@
 
															 INPUTS = {
														
 
															-    "Caisse Générale de Prévoyance (CGP)_2025.pdf": {
														
 
															-        "S.25": [],
														
 
															-        "S.28": [],
														
 
															-        "S.02": []
														
 
															+    "Groupama Gan Vie_2025.pdf": {
														
 
															+        "S.12": []
														
 
															     }
														
 
															 }
														
--- a/Scripts/requirements.txt
+++ b/Scripts/requirements.txt
@@ -8,6 +8,7 @@ langchain-openai==0.2.5
 
															 langgraph==0.2.53
														
 
															 langchain_groq
														
 
															 langfuse==4.2.0
														
 
															+langchain-google-genai
														
 
															 langchain
														
--- a/Scripts/sfcr-app/src/App.jsx
+++ b/Scripts/sfcr-app/src/App.jsx
@@ -576,11 +576,11 @@ export default function App() {
 
															                   )}
														
 
															                 </section>
														
 
															-                {/* 04. ANNEE */}
														
 
															+                {/* 02. ANNEE */}
														
 
															                 {selectedCompanies.length > 0 && (
														
 
															                   <section style={styles.card}>
														
 
															                     <div style={styles.cardHeader}>
														
 
															-                      <span style={styles.cardBadge}>04</span>
														
 
															+                      <span style={styles.cardBadge}>02</span>
														
 
															                       <h2 style={styles.cardTitle}>Année de rapport</h2>
														
 
															                     </div>
														
 
															                     <div style={styles.modeToggle}>
														
@@ -731,10 +731,10 @@ export default function App() {
 
															                   </section>
														
 
															                 )}
														
 
															-                {/* 02. PROMPT */}
														
 
															+                {/* 04. PROMPT */}
														
 
															                 <section style={styles.card}>
														
 
															                   <div style={styles.cardHeader}>
														
 
															-                    <span style={styles.cardBadge}>02</span>
														
 
															+                    <span style={styles.cardBadge}>04</span>
														
 
															                     <h2 style={styles.cardTitle}>Prompt d'extraction</h2>
														
 
															                   </div>
														
 
															                   <div style={styles.promptWrapper}>
														
--- a/Scripts/test_ollama.py
+++ b/Scripts/test_ollama.py
@@ -0,0 +1,53 @@
 
															+import os
														
 
															+import ollama
														
 
															+from pdf2image import convert_from_path
														
 
															+
														
 
															+# Chemin du PDF
														
 
															+script_dir = os.path.dirname(os.path.abspath(__file__))
														
 
															+pdf_path = os.path.join(
														
 
															+    script_dir,
														
 
															+    "..",
														
 
															+    "01 - Sources",
														
 
															+    "2025",
														
 
															+    "CNP Assurances-SFCR-Solo-2025.pdf"
														
 
															+)
														
 
															+
														
 
															+# Pages à traiter
														
 
															+pages = [90]
														
 
															+
														
 
															+prompt = """Extrais uniquement les cellules contenant :
														
 
															+- des codes de lignes (ex: R0010, R0020...)
														
 
															+- des codes de colonnes (ex: C0010, C0020...)
														
 
															+- des valeurs numériques (nombres, %, montants)
														
 
															+
														
 
															+Ignore complètement tout texte descriptif.
														
 
															+
														
 
															+Pour chaque cellule retourne :
														
 
															+- R : numéro de ligne dans le tableau
														
 
															+- C : numéro de colonne dans le tableau
														
 
															+- value : contenu exact de la cellule
														
 
															+
														
 
															+Aucun commentaire, aucun texte supplémentaire.
														
 
															+"""
														
 
															+
														
 
															+for page in pages:
														
 
															+    # Convertir une page PDF en image
														
 
															+    images = convert_from_path(pdf_path, first_page=page, last_page=page,dpi=80)
														
 
															+    
														
 
															+    image_path = f"page_{page}.png"
														
 
															+    images[0].save(image_path, "PNG")
														
 
															+
														
 
															+    # Appel au modèle Ollama (Qwen-VL)
														
 
															+    response = ollama.chat(
														
 
															+        model="minicpm-v",
														
 
															+        messages=[
														
 
															+            {
														
 
															+                "role": "user",
														
 
															+                "content": prompt,
														
 
															+                "images": [image_path]
														
 
															+            }
														
 
															+        ]
														
 
															+    )
														
 
															+
														
 
															+    print(f"Page {page} :")
														
 
															+    print(response["message"]["content"])
														
--- a/Outputs/2025/ACM_Vie_SA/Rapport_S.02_page_56.xlsx
+++ b/Outputs/2025/ACM_Vie_SA/Rapport_S.02_page_56.xlsx
--- a/Outputs/2025/ACM_Vie_SA/Rapport_S.12_page_59.xlsx
+++ b/Outputs/2025/ACM_Vie_SA/Rapport_S.12_page_59.xlsx
--- a/Outputs/2025/ACM_Vie_SA/Rapport_S.22_page_60.xlsx
+++ b/Outputs/2025/ACM_Vie_SA/Rapport_S.22_page_60.xlsx
--- a/Outputs/2025/CNP_Assurances/Rapport_S.05_page_86.xlsx
+++ b/Outputs/2025/CNP_Assurances/Rapport_S.05_page_86.xlsx
--- a/Outputs/2025/CNP_Assurances/Rapport_S.05_page_87.xlsx
+++ b/Outputs/2025/CNP_Assurances/Rapport_S.05_page_87.xlsx
--- a/Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.12_page_52.xlsx
+++ b/Outputs/2025/Caisse_Générale_de_Prévoyance_(CGP)/Rapport_S.12_page_52.xlsx
--- a/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_2.xlsx
+++ b/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_2.xlsx
--- a/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_3.xlsx
+++ b/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.02_page_3.xlsx
--- a/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.12_page_7.xlsx
+++ b/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.12_page_7.xlsx
--- a/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.22_page_11.xlsx
+++ b/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.22_page_11.xlsx
--- a/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.25_page_13.xlsx
+++ b/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.25_page_13.xlsx
--- a/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.28_page_14.xlsx
+++ b/Outputs/2025/Cardif_Assurance_Vie/Rapport_S.28_page_14.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.02_page_71.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.02_page_71.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.02_page_72.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.02_page_72.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.05_page_73.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.05_page_73.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.05_page_74.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.05_page_74.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.12_page_75.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.12_page_75.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.22_page_78.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.22_page_78.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.25_page_81.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.25_page_81.xlsx
--- a/Outputs/2025/Groupama_Gan_Vie/Rapport_S.28_page_82.xlsx
+++ b/Outputs/2025/Groupama_Gan_Vie/Rapport_S.28_page_82.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.02_page_100.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.02_page_100.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.02_page_99.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.02_page_99.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.05_page_101.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.05_page_101.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.12_page_102.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.12_page_102.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.12_page_103.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.12_page_103.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.22_page_104.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.22_page_104.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.25_page_106.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.25_page_106.xlsx
--- a/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.28_page_107.xlsx
+++ b/Outputs/2025/SwissLife_Assurance_et_Patrimoine/Rapport_S.28_page_107.xlsx