| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- from flask import Flask, request, jsonify, Response, stream_with_context
- from flask_cors import CORS
- import subprocess
- import json
- import re
- import os
- import sys
- import threading
- import queue
- import io
- app = Flask(__name__) # on cree object __name qui aide Flask a savoir emplacement des fichiers !
- CORS(app) # Perlet pour Raect Port 3000 a parler avec FLASK port 5000
- if sys.platform == "win32":
- sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
- sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
- def build_inputs(data):
- """Reconstruit le dict INPUTS à partir du payload React."""
- return data.get("inputs", {})
- def update_main_py(inputs, main_path):
- """Met à jour le bloc INPUTS dans main.py."""
- with open(main_path, "r", encoding="utf-8") as f:
- content = f.read()
- start_idx = content.find("INPUTS = {")
- if start_idx != -1:
- brace_count = 0
- end_idx = start_idx
- for i, char in enumerate(content[start_idx:]):
- if char == "{":
- brace_count += 1
- elif char == "}":
- brace_count -= 1
- if brace_count == 0:
- end_idx = start_idx + i + 1
- break
- content = content[:start_idx] + content[end_idx:].lstrip()
- new_inputs_str = f"INPUTS = {json.dumps(inputs, indent=4, ensure_ascii=False)}\n\n"
- content = new_inputs_str + content
- with open(main_path, "w", encoding="utf-8") as f:
- f.write(content)
- f.flush()
- os.fsync(f.fileno())
- def sse_event(event_type, data):
- """Formate un event SSE."""
- payload = json.dumps({"type": event_type, **data})
- return f"data: {payload}\n\n"
- @app.route("/run-extraction-stream", methods=["POST"])
- def run_extraction_stream():
- data = request.json
- inputs = build_inputs(data)
- script_dir = os.path.dirname(os.path.abspath(__file__))
- main_path = os.path.join(script_dir, "main.py")
- def generate():
- # 1. Mise à jour de main.py
- try:
- update_main_py(inputs, main_path)
- except Exception as e:
- yield sse_event("error", {"message": f"Erreur écriture main.py : {str(e)}"})
- return
- yield sse_event("start", {"message": "Démarrage de l'extraction..."})
- # 2. Lancement du subprocess avec stdout/stderr en temps réel
- try:
- process = subprocess.Popen(
- [sys.executable, "-u", main_path], # -u = unbuffered
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT, # merge stderr dans stdout
- text=True,
- bufsize=1, # line-buffered
- cwd=script_dir,
- )
- except Exception as e:
- yield sse_event("error", {"message": f"Impossible de lancer le script : {str(e)}"})
- return
- # 3. Lecture ligne par ligne et parsing des events
- current_company = None
- current_section = None
- for raw_line in process.stdout:
- line = raw_line.rstrip("\n")
- if "Erreur construction Excel" in line or "Erreur sur la page" in line:
- # On extrait le numéro de page si possible, sinon on utilise la dernière connue
- yield sse_event("section_done", {
- "company": current_company,
- "section": current_section,
- "page": page, # assurez-vous que 'page' est défini
- "status": "error",
- "error": line
- })
- continue
- # Toujours envoyer la ligne brute comme log terminal
- yield sse_event("log", {"line": line})
- # --- Parsing des marqueurs émis par main.py ---
- # Nouvelle entreprise
- m = re.search(r"DÉMARRAGE ENTREPRISE\s*:\s*(.+)", line)
- if m:
- current_company = m.group(1).strip()
- yield sse_event("company_start", {"company": current_company})
- continue
- # Nouvelle page/section
- m = re.search(r"Traitement Section\s*:\s*(\S+)\s*[—-]\s*PAGE INDIVIDUELLE\s*:\s*(\d+)", line)
- if m:
- current_section = m.group(1).strip()
- page = m.group(2).strip()
- yield sse_event("section_start", {
- "company": current_company,
- "section": current_section,
- "page": page,
- })
- continue
- # Section terminée avec succès (ligne émise par agent_builder)
- m = re.search(r"Page (\d+) de la section (\S+) terminée", line)
- if m:
- page = m.group(1).strip()
- section = m.group(2).strip()
- yield sse_event("section_done", {
- "company": current_company,
- "section": section,
- "page": page,
- "status": "success",
- })
- continue
- # Erreur de page
- m = re.search(r"Erreur sur la page (\d+)\s*:\s*(.+)", line)
- if m:
- page = m.group(1).strip()
- error = m.group(2).strip()
- yield sse_event("section_done", {
- "company": current_company,
- "section": current_section,
- "page": page,
- "status": "error",
- "error": error,
- })
- continue
- # Fichier Excel sauvegardé (succès confirmé par agent_builder)
- m = re.search(r"✓ Sauvegardé dans\s*:\s*(.+)", line)
- if m:
- path = m.group(1).strip()
- yield sse_event("file_saved", {
- "company": current_company,
- "path": path,
- })
- continue
- process.wait()
- returncode = process.returncode
- if returncode == 0:
- yield sse_event("done", {"message": "Extraction terminée avec succès ✓", "returncode": 0})
- else:
- yield sse_event("done", {"message": f"Extraction terminée avec des erreurs (code {returncode})", "returncode": returncode})
- return Response(
- stream_with_context(generate()),
- mimetype="text/event-stream",
- headers={
- "Cache-Control": "no-cache",
- "X-Accel-Buffering": "no", # Désactive le buffering Nginx si présent
- },
- )
- # Ancien endpoint conservé pour compatibilité
- @app.route("/run-extraction", methods=["POST"])
- def run_extraction():
- data = request.json
- inputs = build_inputs(data)
- script_dir = os.path.dirname(os.path.abspath(__file__))
- main_path = os.path.join(script_dir, "main.py")
- try:
- update_main_py(inputs, main_path)
- except Exception as e:
- return jsonify({"stdout": "", "stderr": f"Erreur écriture : {str(e)}", "returncode": -1})
- try:
- result = subprocess.run(
- [sys.executable, main_path],
- capture_output=True,
- text=True,
- cwd=script_dir,
- timeout=600,
- )
- return jsonify({
- "stdout": result.stdout,
- "stderr": result.stderr,
- "returncode": result.returncode,
- })
- except subprocess.TimeoutExpired:
- return jsonify({"stdout": "", "stderr": "Timeout dépassé", "returncode": -1})
- except Exception as e:
- return jsonify({"stdout": "", "stderr": f"Erreur exécution : {str(e)}", "returncode": -1})
- if __name__ == "__main__":
- app.run(debug=True, port=5000, threaded=True)
|