| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- from flask import Flask, request, jsonify, Response, stream_with_context
- from flask_cors import CORS
- import subprocess
- import json
- import re
- import os
- import sys
- import threading
- import queue
- import io
- app = Flask(__name__) # on cree object __name qui aide Flask a savoir emplacement des fichiers !
- CORS(app) # Perlet pour Raect Port 3000 a parler avec FLASK port 5000
- if sys.platform == "win32":
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
- def build_inputs(data):
- """Reconstruit le dict INPUTS à partir du payload React."""
- return data.get("inputs", {})
- def update_main_py(inputs, main_path):
- """Met à jour le bloc INPUTS dans main.py."""
- with open(main_path, "r", encoding="utf-8") as f:
- content = f.read()
- start_idx = content.find("INPUTS = {")
- if start_idx != -1:
- brace_count = 0
- end_idx = start_idx
- for i, char in enumerate(content[start_idx:]):
- if char == "{":
- brace_count += 1
- elif char == "}":
- brace_count -= 1
- if brace_count == 0:
- end_idx = start_idx + i + 1
- break
- content = content[:start_idx] + content[end_idx:].lstrip()
- new_inputs_str = f"INPUTS = {json.dumps(inputs, indent=4, ensure_ascii=False)}\n\n"
- content = new_inputs_str + content
- with open(main_path, "w", encoding="utf-8") as f:
- f.write(content)
- f.flush()
- os.fsync(f.fileno())
- def sse_event(event_type, data):
- """Formate un event SSE."""
- payload = json.dumps({"type": event_type, **data})
- return f"data: {payload}\n\n"
- @app.route("/run-extraction-stream", methods=["POST"])
- def run_extraction_stream():
- data = request.json
- inputs = build_inputs(data)
- script_dir = os.path.dirname(os.path.abspath(__file__))
- main_path = os.path.join(script_dir, "main.py")
- def generate():
- # 1. Mise à jour de main.py
- try:
- update_main_py(inputs, main_path)
- except Exception as e:
- yield sse_event("error", {"message": f"Erreur écriture main.py : {str(e)}"})
- return
- yield sse_event("start", {"message": "Démarrage de l'extraction..."})
- # 2. Lancement du subprocess avec stdout/stderr en temps réel
- try:
- process = subprocess.Popen(
- [sys.executable, "-u", main_path], # -u = unbuffered
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT, # merge stderr dans stdout
- text=True,
- bufsize=1, # line-buffered
- cwd=script_dir,
- )
- except Exception as e:
- yield sse_event("error", {"message": f"Impossible de lancer le script : {str(e)}"})
- return
- # 3. Lecture ligne par ligne et parsing des events
- current_company = None
- current_section = None
- for raw_line in process.stdout:
- line = raw_line.rstrip("\n")
- if "Erreur construction Excel" in line or "Erreur sur la page" in line:
- # On extrait le numéro de page si possible, sinon on utilise la dernière connue
- yield sse_event("section_done", {
- "company": current_company,
- "section": current_section,
- "page": page, # assurez-vous que 'page' est défini
- "status": "error",
- "error": line
- })
- continue
- # Toujours envoyer la ligne brute comme log terminal
- yield sse_event("log", {"line": line})
- # --- Parsing des marqueurs émis par main.py ---
- # Nouvelle entreprise
- m = re.search(r"DÉMARRAGE ENTREPRISE\s*:\s*(.+)", line)
- if m:
- current_company = m.group(1).strip()
- yield sse_event("company_start", {"company": current_company})
- continue
- # Nouvelle page/section
- m = re.search(r"Traitement Section\s*:\s*(\S+)\s*[—-]\s*PAGE INDIVIDUELLE\s*:\s*(\d+)", line)
- if m:
- current_section = m.group(1).strip()
- page = m.group(2).strip()
- yield sse_event("section_start", {
- "company": current_company,
- "section": current_section,
- "page": page,
- })
- continue
- # Section terminée avec succès (ligne émise par agent_builder)
- m = re.search(r"Page (\d+) de la section (\S+) terminée", line)
- if m:
- page = m.group(1).strip()
- section = m.group(2).strip()
- yield sse_event("section_done", {
- "company": current_company,
- "section": section,
- "page": page,
- "status": "success",
- })
- continue
- # Erreur de page
- m = re.search(r"Erreur sur la page (\d+)\s*:\s*(.+)", line)
- if m:
- page = m.group(1).strip()
- error = m.group(2).strip()
- yield sse_event("section_done", {
- "company": current_company,
- "section": current_section,
- "page": page,
- "status": "error",
- "error": error,
- })
- continue
- # Fichier Excel sauvegardé (succès confirmé par agent_builder)
- m = re.search(r"✓ Sauvegardé dans\s*:\s*(.+)", line)
- if m:
- path = m.group(1).strip()
- yield sse_event("file_saved", {
- "company": current_company,
- "path": path,
- })
- continue
- process.wait()
- returncode = process.returncode
- if returncode == 0:
- yield sse_event("done", {"message": "Extraction terminée avec succès ✓", "returncode": 0})
- else:
- yield sse_event("done", {"message": f"Extraction terminée avec des erreurs (code {returncode})", "returncode": returncode})
- return Response(
- stream_with_context(generate()),
- mimetype="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "X-Accel-Buffering": "no", # Désactive le buffering Nginx si présent
- },
- )
- # Ancien endpoint conservé pour compatibilité
- @app.route("/run-extraction", methods=["POST"])
- def run_extraction():
- data = request.json
- inputs = build_inputs(data)
- script_dir = os.path.dirname(os.path.abspath(__file__))
- main_path = os.path.join(script_dir, "main.py")
- try:
- update_main_py(inputs, main_path)
- except Exception as e:
- return jsonify({"stdout": "", "stderr": f"Erreur écriture : {str(e)}", "returncode": -1})
- try:
- result = subprocess.run(
- [sys.executable, main_path],
- capture_output=True,
- text=True,
- cwd=script_dir,
- timeout=600,
- )
- return jsonify({
- "stdout": result.stdout,
- "stderr": result.stderr,
- "returncode": result.returncode,
- })
- except subprocess.TimeoutExpired:
- return jsonify({"stdout": "", "stderr": "Timeout dépassé", "returncode": -1})
- except Exception as e:
- return jsonify({"stdout": "", "stderr": f"Erreur exécution : {str(e)}", "returncode": -1})
- from langchain_google_genai import ChatGoogleGenerativeAI
- from langchain_core.messages import HumanMessage, SystemMessage
- from dotenv import load_dotenv
- from langchain_groq import ChatGroq
- load_dotenv()
- model_llama = ChatGroq(model="llama-3.3-70b-versatile")
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)
- SYSTEM_PROMPT = """Tu es un assistant d'extraction SFCR. Sois CONCIS et DIRECT.
- Informations à collecter :
- 1. Entreprise(s) — parmi : Abeille Epargne Retraite, Abeille Vie, ACM Vie SA, ACM Vie SAM, Allianz Vie, ANTARIUS, AXA Assurances Vie Mutuelle, AXA France Vie, BPCE VIE, Caisse Générale de Prévoyance (CGP), Cardif Assurance Vie, CNP Assurances, Generali Vie, GMF Vie, Groupama Gan Vie, HSBC Assurances Vie, La France Mutualiste, MAAF Vie, MACSF Epargne Retraite, MAIF Vie, MMA Vie SA, Prépar Vie, SMA Vie BTP, SOGECAP, SPIRICA, SURAVENIR SA, SwissLife Assurance et Patrimoine
- 2. Année — ex: 2021, 2022, 2023, 2024, 2025
- 3. Sections — parmi : S.02, S.05, S.12, S.17, S.22, S.25, S.28
- Règles STRICTES :
- - Si l'utilisateur écrit un nom approchant (ex: "CNP", "SNCP"), déduis l'entreprise la plus proche de la liste.
- - Si l'utilisateur écrit "S0.2" ou "s02", c'est S.02. Sois tolérant sur la syntaxe.
- - Pose UNE seule question courte si une info manque.
- - Quand tu as tout, retourne UNIQUEMENT ce JSON sans aucun texte :
- {"ready": true, "entreprises": ["CNP Assurances"], "année": 2025, "sections": ["S.02"]}
- - Réponses courtes, max 1-2 phrases.
- - Ne demande jamais de confirmation si tu as déjà l'info."""
- @app.route("/chat-agent", methods=["POST"])
- def chat_agent():
- data = request.json
- history = data.get("history", []) # liste de {role, content}
-
- # Construit les messages pour le LLM
- messages = [SystemMessage(content=SYSTEM_PROMPT)]
- for msg in history:
- if msg["role"] == "user":
- messages.append(HumanMessage(content=msg["content"]))
- else:
- messages.append(SystemMessage(content=msg["content"]))
-
- try:
- response = model_llama.invoke(messages)
- reply = response.content.strip()
-
- # Vérifie si le LLM a retourné un JSON "ready"
- try:
- parsed = json.loads(reply)
- if parsed.get("ready"):
- return jsonify({"reply": reply, "ready": True, "data": parsed})
- except json.JSONDecodeError:
- pass
-
- return jsonify({"reply": reply, "ready": False})
-
- except Exception as e:
- return jsonify({"reply": f"Erreur : {str(e)}", "ready": False})
-
- if __name__ == "__main__":
- print(app.url_map)
- app.run(debug=True, port=5000, threaded=True)
|