1 bulan lalu · 8cb0b13820
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,17 @@
 
				+
			
 
				+.vscode
			
 
				+
			
 
				+# Modèles Ollama 
			
 
				+ollama_storage/
			
 
				+
			
 
				+# Données utilisateurs et résultats
			
 
				+data/*
			
 
				+outputs/*
			
 
				+!data/.gitkeep
			
 
				+!outputs/.gitkeep
			
 
				+
			
 
				+# Python
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+.venv/
			
 
				+.env
			
--- a/Agents.py
+++ b/Agents.py
@@ -0,0 +1,260 @@
 
				+import os
			
 
				+from typing import Annotated, Sequence, TypedDict, Optional , List
			
 
				+from dotenv import load_dotenv
			
 
				+from langchain_openai import ChatOpenAI
			
 
				+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage , AIMessage , ToolMessage
			
 
				+from langgraph.graph.message import add_messages 
			
 
				+from langchain_groq import ChatGroq 
			
 
				+from langfuse import get_client
			
 
				+import pandas as pd
			
 
				+from langfuse.langchain import CallbackHandler
			
 
				+import json
			
 
				+
			
 
				+from langchain_community.chat_models import ChatOllama
			
 
				+ 
			
 
				+# Initialize Langfuse CallbackHandler for Langchain (tracing)
			
 
				+langfuse_handler = CallbackHandler()
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Import de tes outils corrigés
			
 
				+from tools import  excel_code_interpreter,   search_tool , inspect_data , call_tool_and_format_for_ollama , robust_json_parse
			
 
				+
			
 
				+langfuse = get_client()
			
 
				+load_dotenv()
			
 
				+
			
 
				+# 1. Définition du State
			
 
				+class AgentState(TypedDict):
			
 
				+    messages: Annotated[Sequence[BaseMessage], add_messages]
			
 
				+    current_df_path: Optional[str]
			
 
				+    Data_colomns: Optional[str]
			
 
				+    generated_charts : List[str]  
			
 
				+
			
 
				+# 2. Configuration du Modèle et des Outils
			
 
				+#model =ChatGroq(model="llama-3.3-70b-versatile")
			
 
				+
			
 
				+
			
 
				+#model_llama=ChatGoogleGenerativeAI(model="gemini-2.5-flash")
			
 
				+
			
 
				+#model_gpt = ChatOpenAI(model="gpt-5.3-codex" ) 
			
 
				+
			
 
				+#model_gpt_5 = ChatOpenAI(model="o4-mini" )
			
 
				+tools = [ search_tool  , excel_code_interpreter,  inspect_data ]
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Local LLM : 
			
 
				+from langchain_experimental.llms.ollama_functions import OllamaFunctions
			
 
				+from langchain_community.chat_models import ChatOllama
			
 
				+
			
 
				+OLLAMA_BASE_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
			
 
				+
			
 
				+local_llm = ChatOllama(
			
 
				+    model="deepseek-coder-v2:16b-lite-instruct-q4_K_M",
			
 
				+    base_url=OLLAMA_BASE_URL,
			
 
				+    format="json",
			
 
				+    temperature=0
			
 
				+)
			
 
				+
			
 
				+
			
 
				+local_llm_fonction = OllamaFunctions(
			
 
				+    model="deepseek-coder-v2:16b-lite-instruct-q4_K_M",
			
 
				+    #model="llama3.1:8b",
			
 
				+    temperature=0,
			
 
				+    format="json",
			
 
				+    num_ctx=4096
			
 
				+)
			
 
				+
			
 
				+
			
 
				+local_llm_reporter = ChatOllama(
			
 
				+    model="llama3.1:8b",
			
 
				+    temperature=0,
			
 
				+    
			
 
				+)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+model_with_tools = local_llm_fonction.bind_tools(tools)
			
 
				+
			
 
				+def agent_analyseur(state: AgentState):
			
 
				+    import os
			
 
				+    import pandas as pd
			
 
				+
			
 
				+    # 1. PHASE D'INSPECTION (Identique)
			
 
				+    file_path = state.get("current_df_path")
			
 
				+    inspection_info = "Aucun fichier détecté."
			
 
				+    columns_list = []
			
 
				+
			
 
				+    if file_path and os.path.exists(file_path):
			
 
				+        try:
			
 
				+            df_temp = pd.read_csv(file_path, nrows=5) if file_path.endswith('.csv') else pd.read_excel(file_path, nrows=5)
			
 
				+            columns_list = df_temp.columns.tolist()
			
 
				+            sample_data = df_temp.head(2).to_string()
			
 
				+            inspection_info = (
			
 
				+                f"COLONNES EXACTES : {columns_list}\n"
			
 
				+                f"APERÇU DES DONNÉES :\n{sample_data}"
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            inspection_info = f"⚠️ Erreur lors de l'inspection : {e}"
			
 
				+
			
 
				+    # 2. RÉCUPÉRATION DE LA QUERY
			
 
				+    user_query = state["messages"][-1].content if state["messages"] else "Pas de question."
			
 
				+
			
 
				+    # 3. CONSTRUCTION DU PROMPT DYNAMIQUE (Comme l'Exécuteur)
			
 
				+    # On définit les règles métier ici
			
 
				+    prompt = (
			
 
				+        "### SYSTEM ROLE ###\n"
			
 
				+        "Tu es l'Analyseur de Dataltist. Ton rôle est de créer un plan d'action logique.\n"
			
 
				+        "Tu dois répondre EXCLUSIVEMENT au format JSON.\n\n"
			
 
				+        
			
 
				+        "### DONNÉES DISPONIBLES ###\n"
			
 
				+        f"{inspection_info}\n\n"
			
 
				+        
			
 
				+        "### RÈGLES CRITIQUES ###\n"
			
 
				+        "1. PAS DE CODE : Ne génère jamais de Python.\n"
			
 
				+        f"2. COLONNES : Utilise uniquement {columns_list}.\n"
			
 
				+        "3. PLAN : Détaille les étapes (Somme, Moyenne, Regroupement, etc.).\n\n"
			
 
				+        
			
 
				+        "### STRUCTURE JSON ATTENDUE ###\n"
			
 
				+        "{\n"
			
 
				+        "  \"plan\": [\"étape 1\", \"étape 2\"],\n"
			
 
				+        "  \"colonnes_utilisees\": [\"col1\", \"col2\"]\n"
			
 
				+        "}\n"
			
 
				+    )
			
 
				+    #
			
 
				+    # 4. PRÉPARATION DU MESSAGE UNIQUE (La méthode qui marche pour l'Exécuteur)
			
 
				+    # On fusionne le prompt et la question utilisateur dans un seul HumanMessage
			
 
				+    full_input = f"{prompt}\n\n### QUESTION UTILISATEUR ###\n{user_query}"
			
 
				+    
			
 
				+    final_messages = [HumanMessage(content=full_input)]
			
 
				+
			
 
				+    config_analyseur = {
			
 
				+        "callbacks": [langfuse_handler],
			
 
				+        "metadata": {"agent_name": "Analyseur"}
			
 
				+    }
			
 
				+
			
 
				+    # 5. APPEL DU MODÈLE
			
 
				+    # Note : On utilise local_llm (ou analyseur_llm si tu as séparé)
			
 
				+    response = local_llm_chat.invoke(final_messages, config=config_analyseur)
			
 
				+
			
 
				+    return {"messages": [response]}
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def agent_executor(state: AgentState):
			
 
				+    # On récupère le nom exact du fichier depuis le state
			
 
				+    full_path = state.get("current_df_path") or "data/default.csv"
			
 
				+    file_name = os.path.basename(full_path) # Récupère 'mon_fichier.csv'
			
 
				+    
			
 
				+    # On construit le prompt en injectant le nom dynamiquement
			
 
				+    prompt = (
			
 
				+        "### SYSTEM ROLE ###\n"
			
 
				+        "Tu es un moteur d'exécution Python. Réponds EXCLUSIVEMENT au format JSON.\n\n"
			
 
				+        
			
 
				+        "### FICHIER ACTUEL ###\n"
			
 
				+        f"Fichier : '{file_name}' | Chemin : 'data/{file_name}'\n\n"
			
 
				+        
			
 
				+        "### INSTRUCTIONS DE CODE & SÉCURITÉ ###\n"
			
 
				+        "1. Imports : pandas, matplotlib.pyplot.\n"
			
 
				+        f"2. Chargement : df = pd.read_csv('data/{file_name}')\n"
			
 
				+        
			
 
				+        "3. SYNTAXE (Apostrophes) : Utilise TOUJOURS des doubles guillemets (\"\") pour les labels.\n\n"
			
 
				+        
			
 
				+        "4. GRAPHIQUES (SAUVEGARDE) :\n"
			
 
				+        "   - INTERDIT : plt.show()\n"
			
 
				+        "   - OBLIGATOIRE : plt.savefig('outputs/nom_du_graphe.png')\n\n"
			
 
				+        
			
 
				+        "5. DATA REPORTING (CRUCIAL POUR LE REPORTER) :\n"
			
 
				+        "   - AVANT de générer le graphique, calcule les stats clés (Somme, Moyenne, Top 3).\n"
			
 
				+        "   - Utilise print() pour afficher ces chiffres afin que le Reporter les reçoive.\n\n"
			
 
				+        
			
 
				+        "6. VARIABLE 'result' :\n"
			
 
				+        "   - Termine ton code par une variable 'result' qui récapitule :\n"
			
 
				+        "     1. Les fichiers générés (ex: 'outputs/ventes.png')\n"
			
 
				+        "     2. Un résumé textuel des chiffres affichés par les graphiques.\n\n"
			
 
				+        
			
 
				+        "### EXEMPLE DE CODE INTERNE ATTENDU ###\n"
			
 
				+        "import pandas as pd\n"
			
 
				+        "import matplotlib.pyplot as plt\n"
			
 
				+        "df = pd.read_csv('...')\n"
			
 
				+        "stats = df.groupby('Pays')['Ventes'].sum()\n"
			
 
				+        "print(stats) # <--- Ceci permet au Reporter de voir les chiffres !\n"
			
 
				+        "plt.bar(stats.index, stats.values)\n"
			
 
				+        "plt.savefig('outputs/ventes_pays.png')\n"
			
 
				+        "result = f\"Graphique 'ventes_pays.png' généré. Chiffres clés : {stats.to_dict()}\"\n"
			
 
				+    )
			
 
				+        # Nettoyage des messages pour Ollama (comme avant)
			
 
				+    cleaned_messages = []
			
 
				+    for m in state["messages"]:
			
 
				+        if m.type == "tool":
			
 
				+            cleaned_messages.append(HumanMessage(content=f"Résultat : {m.content}"))
			
 
				+        else:
			
 
				+            cleaned_messages.append(m)
			
 
				+
			
 
				+    # On place le prompt dynamique en premier
			
 
				+    final_messages = [HumanMessage(content=prompt)] + cleaned_messages
			
 
				+
			
 
				+    return {"messages": [model_with_tools.invoke(final_messages)]}
			
 
				+
			
 
				+
			
 
				+
			
 
				+def agent_reporter(state : AgentState) : 
			
 
				+
			
 
				+    prompt_reporter = (
			
 
				+        ### RÔLE ###
			
 
				+"""            Tu es un Expert Consultant en Data Visualisation et Analyse Statistique. 
			
 
				+            Ton objectif est de transformer des données techniques brutes en un rapport stratégique clair, élégant et actionnable.
			
 
				+
			
 
				+            ### DIRECTIVES DE RÉDACTION ###
			
 
				+            1. INTERPRÉTATION : Ne te contente pas de citer les chiffres. Explique ce qu'ils signifient Utilise correctement les chiffres retournés par les outils : assure-toi de bien les comprendre et de les avoir triés.
			
 
				+            2. STRUCTURE : Utilise des titres (##), du gras (**), et des listes à puces pour la clarté.
			
 
				+            3. VISUELS : Mentionne les graphiques générés avec l'émoji 📊.
			
 
				+            4. TON : Professionnel, dynamique et rassurant. 
			
 
				+            5. PAS DE JSON : Ta réponse doit être uniquement du texte formaté en Markdown.
			
 
				+
			
 
				+            ### STRUCTURE DU RAPPORT ###
			
 
				+            ## 📝 Synthèse de l'Analyse
			
 
				+            (Un paragraphe fluide qui résume la situation globale).
			
 
				+
			
 
				+            ## 💡 Points Clés & Insights
			
 
				+            - **[Point 1]** : Explication...
			
 
				+            - **[Point 2]** : Explication...
			
 
				+
			
 
				+            ## 📊 Visualisations Disponibles
			
 
				+            - [Nom du fichier.png] : Brève description de ce que le graphique démontre.
			
 
				+
			
 
				+            ## 🚀 Recommandations si tu n'as
			
 
				+            (Une ou deux phrases sur la prochaine étape suggérée)."""
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+    all_messages = state["messages"]
			
 
				+    
			
 
				+    # On ne garde que :
			
 
				+    # 1. Le premier message (User) pour le contexte
			
 
				+    # 2. Le DERNIER message de l'IA (le code qui a marché)
			
 
				+    # 3. Le DERNIER message de l'outil (les données réelles)
			
 
				+    
			
 
				+    important_messages = [
			
 
				+        all_messages[0], # La question de l'utilisateur
			
 
				+        [m for m in all_messages if isinstance(m, AIMessage)][-1], # Le dernier code
			
 
				+        [m for m in all_messages if isinstance(m, ToolMessage)][-1] # Le dernier résultat
			
 
				+    ]
			
 
				+
			
 
				+            # Nettoyage des messages pour Ollama (comme avant)
			
 
				+    cleaned_messages = []
			
 
				+    for m in important_messages :
			
 
				+        if m.type == "tool":
			
 
				+            cleaned_messages.append(HumanMessage(content=f"Résultat : {m.content}"))
			
 
				+        else:
			
 
				+            cleaned_messages.append(m)
			
 
				+
			
 
				+    # On place le prompt dynamique en premier
			
 
				+    final_messages = [HumanMessage(content=prompt_reporter)] + cleaned_messages
			
 
				+
			
 
				+    return {"messages": [local_llm_reporter.invoke(final_messages)]}
			
 
				+
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,25 @@
 
				+FROM python:3.13-slim 
			
 
				+
			
 
				+#On se place le dossier de travail interne 
			
 
				+WORKDIR /app
			
 
				+
			
 
				+# Installation des outils pour compiler si besoin
			
 
				+RUN apt-get update && apt-get install -y \
			
 
				+    build-essential \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+# On copie les depandeences et on les install 
			
 
				+COPY requirements.txt .
			
 
				+RUN pip install --no-cache-dir -r requirements.txt
			
 
				+
			
 
				+# On copie tout le code (Agents.py, app.py, tools.py...)
			
 
				+COPY . . 
			
 
				+
			
 
				+#Création des dossier de données et de sortie  
			
 
				+RUN mkdir data outputs 
			
 
				+
			
 
				+#Exposition du port Streamlit 
			
 
				+Expose 8501
			
 
				+
			
 
				+# Lancement
			
 
				+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
			
--- a/Dockerfile.ollama
+++ b/Dockerfile.ollama
@@ -0,0 +1,13 @@
 
				+FROM ollama/ollama:latest
			
 
				+
			
 
				+# Installer curl pour que le script de vérification fonctionne
			
 
				+RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+# Copier le script dans le conteneur
			
 
				+COPY entrypoint.sh /entrypoint.sh
			
 
				+
			
 
				+# Donner les permissions d'exécution
			
 
				+RUN chmod +x /entrypoint.sh
			
 
				+
			
 
				+# Définir le script comme commande de démarrage
			
 
				+ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
			
--- a/app.py
+++ b/app.py
@@ -0,0 +1,158 @@
 
				+import streamlit as st
			
 
				+import os
			
 
				+import glob
			
 
				+import shutil
			
 
				+from workflow_Agent import app  # Ton graphe compilé
			
 
				+from langchain_core.messages import HumanMessage
			
 
				+import pandas as pd
			
 
				+
			
 
				+# --- CONFIGURATION ET NETTOYAGE AU DÉMARRAGE ---
			
 
				+st.set_page_config(page_title="Dataltist AI Assistant", layout="wide")
			
 
				+st.title("📊 Dataltist BI Chat")
			
 
				+
			
 
				+from langfuse.langchain import CallbackHandler
			
 
				+ 
			
 
				+# Initialize Langfuse CallbackHandler for Langchain (tracing)
			
 
				+langfuse_handler = CallbackHandler()
			
 
				+
			
 
				+
			
 
				+# Initialisation du dossier de travail
			
 
				+if "app_initialized" not in st.session_state:
			
 
				+    if os.path.exists("outputs"):
			
 
				+        shutil.rmtree("outputs")
			
 
				+    os.makedirs("outputs", exist_ok=True)
			
 
				+    if not os.path.exists("data"):
			
 
				+        os.makedirs("data", exist_ok=True)
			
 
				+    st.session_state.app_initialized = True
			
 
				+    st.session_state.query_count = 0
			
 
				+    st.session_state.messages = []
			
 
				+
			
 
				+# --- 1. SIDEBAR : GESTION DES DONNÉES ---
			
 
				+file_path = None
			
 
				+
			
 
				+with st.sidebar:
			
 
				+    st.header("📁 Données")
			
 
				+    uploaded_file = st.file_uploader("Charge ton fichier (CSV/Excel)", type=["csv", "xlsx"])
			
 
				+    
			
 
				+    if uploaded_file:
			
 
				+        # 1. Définition et sauvegarde du fichier
			
 
				+        file_path = os.path.join("data", uploaded_file.name)
			
 
				+        with open(file_path, "wb") as f:
			
 
				+            f.write(uploaded_file.getbuffer())
			
 
				+        st.success(f"Fichier '{uploaded_file.name}' prêt !") 
			
 
				+
			
 
				+        df = pd.read_csv(file_path) if file_path.endswith(".csv") else pd.read_excel(file_path)
			
 
				+        columns_list = df.columns.tolist()
			
 
				+
			
 
				+        # 2. Initialisation ou Mise à jour du Session State
			
 
				+        # On utilise le nom exact de ta classe : AgentState
			
 
				+        if "AgentState" not in st.session_state:
			
 
				+            st.session_state.AgentState = {
			
 
				+                "messages": [], # Liste vide pour l'historique
			
 
				+                "current_df_path": file_path, # Le chemin vers le fichier uploadé
			
 
				+                "Data_colomns" : columns_list, 
			
 
				+                "generated_charts": [] # Liste vide pour les futurs PNG
			
 
				+            }
			
 
				+        else:
			
 
				+            # Si l'utilisateur change de fichier, on met à jour le chemin
			
 
				+            st.session_state.AgentState["current_df_path"] = file_path
			
 
				+
			
 
				+  
			
 
				+    st.write("---")
			
 
				+    if st.button("🗑️ Effacer la discussion & fichiers", use_container_width=True):
			
 
				+        st.session_state.messages = []
			
 
				+        st.session_state.query_count = 0
			
 
				+        if os.path.exists("outputs"):
			
 
				+            shutil.rmtree("outputs")
			
 
				+        os.makedirs("outputs", exist_ok=True)
			
 
				+        st.rerun()
			
 
				+
			
 
				+# --- 2. AFFICHAGE DE L'HISTORIQUE ---
			
 
				+for message in st.session_state.messages:
			
 
				+    with st.chat_message(message["role"]):
			
 
				+        st.markdown(message["content"])
			
 
				+        # Réaffichage des graphiques archivés pour ce message
			
 
				+        if "charts" in message and message["charts"]:
			
 
				+            cols = st.columns(min(len(message["charts"]), 2))
			
 
				+            for idx, path in enumerate(message["charts"]):
			
 
				+                if os.path.exists(path):
			
 
				+                    cols[idx % 2].image(path, use_container_width=True)
			
 
				+
			
 
				+# --- 3. INTERACTION AVEC LES AGENTS ---
			
 
				+if prompt := st.chat_input("Pose ta question sur tes données..."):
			
 
				+    if not file_path:
			
 
				+        st.error("Veuillez d'abord charger un fichier dans la barre latérale.")
			
 
				+        st.stop()
			
 
				+
			
 
				+    # Archivage : Création du dossier spécifique pour cette requête
			
 
				+    current_query_id = st.session_state.query_count
			
 
				+    query_folder = os.path.join("outputs", f"query_{current_query_id}")
			
 
				+    os.makedirs(query_folder, exist_ok=True)
			
 
				+
			
 
				+    # Affichage message utilisateur
			
 
				+    st.session_state.messages.append({"role": "user", "content": prompt})
			
 
				+    with st.chat_message("user"):
			
 
				+        st.markdown(prompt)
			
 
				+
			
 
				+    # Appel des agents
			
 
				+    with st.chat_message("assistant"):
			
 
				+        with st.status("Analyse Dataltist en cours...", expanded=True) as status:
			
 
				+            
			
 
				+            inputs = {
			
 
				+                "messages": [HumanMessage(content=prompt)],
			
 
				+                "current_df_path": file_path
			
 
				+            }
			
 
				+            
			
 
				+            # Exécution du graphe
			
 
				+            final_state = app.invoke(
			
 
				+                inputs, 
			
 
				+                config={"callbacks": [langfuse_handler], "run_name": f"Query_{st.session_state.query_count}"}
			
 
				+            )
			
 
				+            
			
 
				+            # Récupération de la réponse finale (Reporter)
			
 
				+            response_text = final_state["messages"][-1].content
			
 
				+            st.markdown(response_text)
			
 
				+            
			
 
				+            # --- GESTION DES FICHIERS GÉNÉRÉS ---
			
 
				+            new_files_paths = []
			
 
				+            # On scanne la racine de 'outputs/' pour trouver ce que l'Exécuteur a créé
			
 
				+            raw_files = [f for f in glob.glob("outputs/*") if os.path.isfile(f)]
			
 
				+            
			
 
				+            if raw_files:
			
 
				+                st.write("---")
			
 
				+                st.subheader("📊 Résultats de l'analyse")
			
 
				+                
			
 
				+                # On déplace les fichiers vers le dossier de la query pour l'historique
			
 
				+                for f in raw_files:
			
 
				+                    dest_path = os.path.join(query_folder, os.path.basename(f))
			
 
				+                    shutil.move(f, dest_path)
			
 
				+                    new_files_paths.append(dest_path)
			
 
				+
			
 
				+                # Affichage des images et fichiers Excel
			
 
				+                images = [p for p in new_files_paths if p.lower().endswith(('.png', '.jpg'))]
			
 
				+                docs = [p for p in new_files_paths if p.lower().endswith(('.xlsx', '.csv'))]
			
 
				+
			
 
				+                if images:
			
 
				+                    cols = st.columns(min(len(images), 2))
			
 
				+                    for idx, img_path in enumerate(images):
			
 
				+                        cols[idx % 2].image(img_path, use_container_width=True, caption=f"Visuel {idx+1}")
			
 
				+                
			
 
				+                if docs:
			
 
				+                    for doc_path in docs:
			
 
				+                        with open(doc_path, "rb") as f:
			
 
				+                            st.download_button(
			
 
				+                                label=f"📥 Télécharger {os.path.basename(doc_path)}",
			
 
				+                                data=f,
			
 
				+                                file_name=os.path.basename(doc_path),
			
 
				+                                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
			
 
				+                            )
			
 
				+            
			
 
				+            status.update(label="Analyse terminée !", state="complete")
			
 
				+
			
 
				+    # Mise à jour de la session et du compteur
			
 
				+    st.session_state.messages.append({
			
 
				+        "role": "assistant", 
			
 
				+        "content": response_text, 
			
 
				+        "charts": new_files_paths
			
 
				+    })
			
 
				+    st.session_state.query_count += 1
			
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,29 @@
 
				+version: '3.8'
			
 
				+
			
 
				+services:
			
 
				+  # --- SERVICE OLLAMA (Auto-configuré) ---
			
 
				+  ollama-server:
			
 
				+    build:
			
 
				+      context: .
			
 
				+      dockerfile: Dockerfile.ollama
			
 
				+    container_name: ollama_container
			
 
				+    volumes:
			
 
				+      - ./ollama_storage:/root/.ollama
			
 
				+    ports:
			
 
				+      - "11434:11434"
			
 
				+
			
 
				+  # --- SERVICE TON APP (Streamlit) ---
			
 
				+  dataltist-app:
			
 
				+    build:
			
 
				+      context: .
			
 
				+      dockerfile: Dockerfile  # Ton Dockerfile Python 3.13 habituel
			
 
				+    container_name: dataltist_container
			
 
				+    ports:
			
 
				+      - "8501:8501"
			
 
				+    environment:
			
 
				+      - OLLAMA_URL=http://ollama-server:11434
			
 
				+    depends_on:
			
 
				+      - ollama-server
			
 
				+    volumes:
			
 
				+      - ./data:/app/data
			
 
				+      - ./outputs:/app/outputs
			
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,5 @@
 
				+#!/bin/bash
			
 
				+ollama serve & 
			
 
				+sleep 5
			
 
				+ollama pull deepseek-coder-v2:16b-lite-instruct-q4_K_M llama3.1:8b
			
 
				+wait
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,21 @@
 
				+# --- Framework Core ---
			
 
				+streamlit
			
 
				+langgraph
			
 
				+langchain
			
 
				+langchain-community
			
 
				+langchain-ollama
			
 
				+langchain-core
			
 
				+langchain-openai
			
 
				+langfuse
			
 
				+
			
 
				+# --- Data Science & Visualisation ---
			
 
				+pandas
			
 
				+openpyxl
			
 
				+matplotlib
			
 
				+seaborn
			
 
				+
			
 
				+# --- Utilitaires ---
			
 
				+python-dotenv
			
 
				+requests
			
 
				+glob
			
 
				+shutil
			
--- a/tools.py
+++ b/tools.py
@@ -0,0 +1,205 @@
 
				+import os 
			
 
				+import pandas as pd
			
 
				+from langchain_core.tools import tool 
			
 
				+from langchain_community.tools import DuckDuckGoSearchRun
			
 
				+import matplotlib.pyplot as plt
			
 
				+import seaborn as sns
			
 
				+from langchain_core.messages import HumanMessage, AIMessage
			
 
				+import sys
			
 
				+import io
			
 
				+
			
 
				+import matplotlib
			
 
				+matplotlib.use('Agg')  # Force Matplotlib à ne pas ouvrir de fenêtre graphique
			
 
				+
			
 
				+@tool
			
 
				+def convert_csv_to_excel(csv_path: str):
			
 
				+    """Convertit un fichier CSV en Excel (.xlsx)."""
			
 
				+    if not os.path.exists(csv_path):
			
 
				+        return f"Désolé, je ne trouve pas le fichier '{csv_path}'. Vérifiez qu'il est bien présent dans le dossier du projet."
			
 
				+    
			
 
				+    try:
			
 
				+        df = pd.read_csv(csv_path)
			
 
				+        new_path = csv_path.replace(".csv", ".xlsx")
			
 
				+        df.to_excel(new_path, index=False)
			
 
				+        return f"Succès : Le fichier a été converti en {new_path}"
			
 
				+    
			
 
				+    except Exception as e:
			
 
				+        return f"Erreur lors de la lecture du CSV : {str(e)}"
			
 
				+    
			
 
				+
			
 
				+
			
 
				+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
			
 
				+
			
 
				+
			
 
				+@tool
			
 
				+def inspect_data(file_name: str):
			
 
				+    """
			
 
				+    Explore le fichier situé dans le dossier 'data/' pour retourner les colonnes et un aperçu.
			
 
				+    Passer uniquement le nom du fichier (ex: 'data.csv').
			
 
				+    """
			
 
				+    # 1. On nettoie le nom du fichier au cas où l'IA ajoute des guillemets
			
 
				+    clean_name = file_name.strip().replace("'", "").replace('"', "")
			
 
				+    
			
 
				+    # 2. On construit le chemin ABSOLU vers le dossier data
			
 
				+    # On suppose que ton script est à la racine du projet
			
 
				+    data_path = os.path.join(os.getcwd(), "data", clean_name)
			
 
				+
			
 
				+    try:
			
 
				+        # 3. Vérification de l'existence du fichier
			
 
				+        if not os.path.exists(data_path):
			
 
				+            return f"Erreur : Le fichier '{clean_name}' est introuvable dans le dossier data/."
			
 
				+
			
 
				+        # 4. Lecture selon l'extension (Note : correction de 'endiwith' en 'endswith')
			
 
				+        if data_path.lower().endswith(".csv"):
			
 
				+            df = pd.read_csv(data_path) # Utilisation de la variable data_path, PAS de la chaîne "file_path"
			
 
				+        elif data_path.lower().endswith((".xlsx", ".xls")):
			
 
				+            df = pd.read_excel(data_path)
			
 
				+        else:
			
 
				+            return "Format de fichier non supporté. Utilisez .csv ou .xlsx"
			
 
				+
			
 
				+        # 5. Extraction des infos
			
 
				+        columns_names = df.columns.tolist()
			
 
				+        preview = df.head(3).to_string()
			
 
				+
			
 
				+        return f"Colonnes trouvées : {columns_names}\n\nAperçu des données :\n{preview}"
			
 
				+    
			
 
				+    except Exception as e:
			
 
				+        return f"Erreur lors de l'inspection : {str(e)}"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+@tool
			
 
				+def excel_code_interpreter(file_path: str, code: str):
			
 
				+    """Exécute du code Python sur le fichier (CSV ou Excel) chargé dans 'df'."""
			
 
				+    
			
 
				+    import warnings
			
 
				+    warnings.filterwarnings("ignore")
			
 
				+    
			
 
				+    # 1. Nettoyage et construction du chemin
			
 
				+    file_name = os.path.basename(file_path.strip().replace("'", "").replace('"', ""))
			
 
				+    # Assure-toi que BASE_DIR est défini globalement ou remplace-le par os.getcwd()
			
 
				+    data_folder_path = os.path.join(BASE_DIR, "data", file_name)
			
 
				+    root_path = os.path.join(BASE_DIR, file_name)
			
 
				+    
			
 
				+    if os.path.exists(data_folder_path):
			
 
				+        full_path = data_folder_path
			
 
				+    elif os.path.exists(root_path):
			
 
				+        full_path = root_path
			
 
				+    else:
			
 
				+        return f"ERREUR : Fichier '{file_name}' introuvable."
			
 
				+
			
 
				+    # 2. CAPTURE DE LA CONSOLE (stdout)
			
 
				+    # On crée un tampon pour intercepter les print()
			
 
				+    buffer = io.StringIO()
			
 
				+    old_stdout = sys.stdout # On sauvegarde la sortie d'origine (ton terminal)
			
 
				+    sys.stdout = buffer     # On redirige vers notre variable
			
 
				+
			
 
				+    try:
			
 
				+        # Lecture du fichier
			
 
				+        df = pd.read_csv(full_path) if file_name.endswith('.csv') else pd.read_excel(full_path)
			
 
				+
			
 
				+        # 3. Préparation du contexte d'exécution
			
 
				+        context = {
			
 
				+            "df": df,
			
 
				+            "pd": pd,
			
 
				+            "plt": plt,
			
 
				+            "os": os,
			
 
				+            "result": None,
			
 
				+            "__builtins__": __builtins__ 
			
 
				+        }
			
 
				+
			
 
				+        # 4. Exécution du code généré par l'IA
			
 
				+        exec(code, context)
			
 
				+
			
 
				+        # 5. Récupération des données capturées
			
 
				+        output_console = buffer.getvalue()
			
 
				+        final_result_variable = context.get("result", "")
			
 
				+
			
 
				+        # On remet le système à la normale
			
 
				+        sys.stdout = old_stdout
			
 
				+
			
 
				+        # 6. ON FUSIONNE TOUT POUR LE REPORTER
			
 
				+        # C'est ce bloc de texte que le Reporter va recevoir dans Langfuse
			
 
				+        full_report = f"--- RÉSULTATS DE LA CONSOLE ---\n{output_console}\n"
			
 
				+        full_report += f"--- RÉSUMÉ FINAL ---\n{final_result_variable}"
			
 
				+        
			
 
				+        return full_report
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        # En cas d'erreur, on n'oublie pas de remettre le stdout à la normale
			
 
				+        sys.stdout = old_stdout
			
 
				+        return f"ERREUR PYTHON : {str(e)}"
			
 
				+    
			
 
				+
			
 
				+
			
 
				+ddg = DuckDuckGoSearchRun()
			
 
				+@tool 
			
 
				+def search_tool(query : str) : 
			
 
				+    """Recherche sur le web. Limité pour économiser les tokens."""
			
 
				+    try:
			
 
				+        results = ddg.run(query)
			
 
				+        
			
 
				+        if not results:
			
 
				+            return "Aucun résultat trouvé."
			
 
				+            
			
 
				+        # 1. On nettoie les espaces superflus pour gagner des tokens
			
 
				+        clean_results = " ".join(results.split())
			
 
				+        
			
 
				+        # 2. On limite intelligemment (ex: 1200 chars pour plus de contexte)
			
 
				+        # Mais on s'assure de ne pas couper un mot au milieu
			
 
				+        limit = 1200
			
 
				+        if len(clean_results) <= limit:
			
 
				+            return clean_results
			
 
				+        
			
 
				+        return clean_results[:limit] + "... [Résultat tronqué pour économie]"
			
 
				+    except Exception as e:
			
 
				+        return f"Erreur lors de la recherche : {str(e)}"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def call_tool_and_format_for_ollama(tool_output):
			
 
				+    """
			
 
				+    Cette fonction transforme la sortie brute de l'outil 
			
 
				+    en un format que ton Llama 3.1 8B local peut comprendre.
			
 
				+    """
			
 
				+    # Au lieu de renvoyer un ToolMessage (qui fait planter Ollama)
			
 
				+    # On crée un message "Observation"
			
 
				+    observation_message = HumanMessage(
			
 
				+        content=f"OBSERVATION DE L'OUTIL :\n{tool_output}\n\nUtilise ces données pour continuer ton analyse."
			
 
				+    )
			
 
				+    return observation_message
			
 
				+
			
 
				+
			
 
				+import re
			
 
				+import json
			
 
				+
			
 
				+def robust_json_parse(text):
			
 
				+    try:
			
 
				+        cleaned = text.strip()
			
 
				+
			
 
				+        # Extraction JSON
			
 
				+        match = re.search(r"\{.*\}", cleaned, re.DOTALL)
			
 
				+        if not match:
			
 
				+            raise ValueError("Aucun JSON détecté")
			
 
				+
			
 
				+        json_str = match.group()
			
 
				+
			
 
				+        # Parsing direct
			
 
				+        try:
			
 
				+            return json.loads(json_str)
			
 
				+        except json.JSONDecodeError:
			
 
				+            pass
			
 
				+
			
 
				+        # Auto-fix
			
 
				+        json_str = json_str.replace("\n", " ")
			
 
				+        json_str = re.sub(r",\s*}", "}", json_str)
			
 
				+        json_str = re.sub(r",\s*]", "]", json_str)
			
 
				+
			
 
				+        return json.loads(json_str)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        raise ValueError(f"Parsing JSON échoué : {e}\nRAW:\n{text}")
			
--- a/workflow_Agent.py
+++ b/workflow_Agent.py
@@ -0,0 +1,67 @@
 
				+from langgraph.graph import StateGraph , START , END 
			
 
				+from langgraph.prebuilt import ToolNode, tools_condition
			
 
				+from Agents import AgentState , agent_analyseur , agent_executor , tools , agent_reporter
			
 
				+from langchain_core.messages import AIMessage , ToolMessage
			
 
				+
			
 
				+
			
 
				+# 1. Initialisation du graphe avec l'état personnalisé
			
 
				+workflow = StateGraph(AgentState)
			
 
				+
			
 
				+# 2. Ajout des nœuds
			
 
				+workflow.add_node("analyseur", agent_analyseur)
			
 
				+workflow.add_node("executor", agent_executor)
			
 
				+workflow.add_node("reporter", agent_reporter)
			
 
				+workflow.add_node("tools", ToolNode(tools))
			
 
				+
			
 
				+# 3. Définition des arêtes stables
			
 
				+workflow.add_edge(START, "analyseur")
			
 
				+workflow.add_edge("analyseur", "executor") 
			
 
				+workflow.add_edge("reporter", END)      
			
 
				+
			
 
				+# 4. Logique de routage personnalisée
			
 
				+def router(state: AgentState):
			
 
				+    messages = state["messages"]
			
 
				+    last_message = messages[-1]
			
 
				+
			
 
				+    # CAS 1 : L'agent demande un outil
			
 
				+    if hasattr(last_message, "tool_calls") and last_message.tool_calls:
			
 
				+        return "tools"
			
 
				+
			
 
				+    # CAS 2 : Analyse du retour de l'outil 
			
 
				+    if isinstance(last_message, ToolMessage):
			
 
				+        content_upper = last_message.content.upper()
			
 
				+        
			
 
				+        # On détecte ton marqueur spécifique ou le mot "ERROR"
			
 
				+        if "ERREUR PYTHON" in content_upper or "ERROR" in content_upper:
			
 
				+            print("--- LOG : Erreur détectée, renvoi à l'exécuteur pour correction ---")
			
 
				+            return "executor" 
			
 
				+        
			
 
				+        # Si pas d'erreur, on peut passer à la synthèse
			
 
				+        return "reporter"
			
 
				+
			
 
				+    # CAS 3 : Par défaut
			
 
				+    return "reporter"
			
 
				+
			
 
				+# On applique la condition sur l'executor
			
 
				+workflow.add_conditional_edges(
			
 
				+    "executor",
			
 
				+    router,
			
 
				+    {"tools": "tools", "reporter": "reporter"}
			
 
				+)
			
 
				+
			
 
				+workflow.add_conditional_edges(
			
 
				+    "tools" , 
			
 
				+    router , 
			
 
				+    {"executor": "executor", "reporter": "reporter"}
			
 
				+)
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 5. Compilation
			
 
				+app = workflow.compile()
			
 
				+
			
 
				+# Pour sauvegarder l'image du workflow
			
 
				+with open("graph_workflow.png", "wb") as f:
			
 
				+    f.write(app.get_graph().draw_mermaid_png())
			
 
				+
			
 
				+print("Graphique du workflow généré sous : graph_workflow.png")