server.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. from flask import Flask, request, jsonify, Response, stream_with_context
  2. from flask_cors import CORS
  3. import subprocess
  4. import json
  5. import re
  6. import os
  7. import sys
  8. import threading
  9. import queue
  10. import io
  11. app = Flask(__name__) # on cree object __name qui aide Flask a savoir emplacement des fichiers !
  12. CORS(app) # Perlet pour Raect Port 3000 a parler avec FLASK port 5000
  13. if sys.platform == "win32":
  14. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  15. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  16. def build_inputs(data):
  17. """Reconstruit le dict INPUTS à partir du payload React."""
  18. return data.get("inputs", {})
  19. def update_main_py(inputs, main_path):
  20. """Met à jour le bloc INPUTS dans main.py."""
  21. with open(main_path, "r", encoding="utf-8") as f:
  22. content = f.read()
  23. start_idx = content.find("INPUTS = {")
  24. if start_idx != -1:
  25. brace_count = 0
  26. end_idx = start_idx
  27. for i, char in enumerate(content[start_idx:]):
  28. if char == "{":
  29. brace_count += 1
  30. elif char == "}":
  31. brace_count -= 1
  32. if brace_count == 0:
  33. end_idx = start_idx + i + 1
  34. break
  35. content = content[:start_idx] + content[end_idx:].lstrip()
  36. new_inputs_str = f"INPUTS = {json.dumps(inputs, indent=4, ensure_ascii=False)}\n\n"
  37. content = new_inputs_str + content
  38. with open(main_path, "w", encoding="utf-8") as f:
  39. f.write(content)
  40. f.flush()
  41. os.fsync(f.fileno())
  42. def sse_event(event_type, data):
  43. """Formate un event SSE."""
  44. payload = json.dumps({"type": event_type, **data})
  45. return f"data: {payload}\n\n"
  46. @app.route("/run-extraction-stream", methods=["POST"])
  47. def run_extraction_stream():
  48. data = request.json
  49. inputs = build_inputs(data)
  50. script_dir = os.path.dirname(os.path.abspath(__file__))
  51. main_path = os.path.join(script_dir, "main.py")
  52. def generate():
  53. # 1. Mise à jour de main.py
  54. try:
  55. update_main_py(inputs, main_path)
  56. except Exception as e:
  57. yield sse_event("error", {"message": f"Erreur écriture main.py : {str(e)}"})
  58. return
  59. yield sse_event("start", {"message": "Démarrage de l'extraction..."})
  60. # 2. Lancement du subprocess avec stdout/stderr en temps réel
  61. try:
  62. process = subprocess.Popen(
  63. [sys.executable, "-u", main_path], # -u = unbuffered
  64. stdout=subprocess.PIPE,
  65. stderr=subprocess.STDOUT, # merge stderr dans stdout
  66. text=True,
  67. bufsize=1, # line-buffered
  68. cwd=script_dir,
  69. )
  70. except Exception as e:
  71. yield sse_event("error", {"message": f"Impossible de lancer le script : {str(e)}"})
  72. return
  73. # 3. Lecture ligne par ligne et parsing des events
  74. current_company = None
  75. current_section = None
  76. for raw_line in process.stdout:
  77. line = raw_line.rstrip("\n")
  78. if "Erreur construction Excel" in line or "Erreur sur la page" in line:
  79. # On extrait le numéro de page si possible, sinon on utilise la dernière connue
  80. yield sse_event("section_done", {
  81. "company": current_company,
  82. "section": current_section,
  83. "page": page, # assurez-vous que 'page' est défini
  84. "status": "error",
  85. "error": line
  86. })
  87. continue
  88. # Toujours envoyer la ligne brute comme log terminal
  89. yield sse_event("log", {"line": line})
  90. # --- Parsing des marqueurs émis par main.py ---
  91. # Nouvelle entreprise
  92. m = re.search(r"DÉMARRAGE ENTREPRISE\s*:\s*(.+)", line)
  93. if m:
  94. current_company = m.group(1).strip()
  95. yield sse_event("company_start", {"company": current_company})
  96. continue
  97. # Nouvelle page/section
  98. m = re.search(r"Traitement Section\s*:\s*(\S+)\s*[—-]\s*PAGE INDIVIDUELLE\s*:\s*(\d+)", line)
  99. if m:
  100. current_section = m.group(1).strip()
  101. page = m.group(2).strip()
  102. yield sse_event("section_start", {
  103. "company": current_company,
  104. "section": current_section,
  105. "page": page,
  106. })
  107. continue
  108. # Section terminée avec succès (ligne émise par agent_builder)
  109. m = re.search(r"Page (\d+) de la section (\S+) terminée", line)
  110. if m:
  111. page = m.group(1).strip()
  112. section = m.group(2).strip()
  113. yield sse_event("section_done", {
  114. "company": current_company,
  115. "section": section,
  116. "page": page,
  117. "status": "success",
  118. })
  119. continue
  120. # Erreur de page
  121. m = re.search(r"Erreur sur la page (\d+)\s*:\s*(.+)", line)
  122. if m:
  123. page = m.group(1).strip()
  124. error = m.group(2).strip()
  125. yield sse_event("section_done", {
  126. "company": current_company,
  127. "section": current_section,
  128. "page": page,
  129. "status": "error",
  130. "error": error,
  131. })
  132. continue
  133. # Fichier Excel sauvegardé (succès confirmé par agent_builder)
  134. m = re.search(r"✓ Sauvegardé dans\s*:\s*(.+)", line)
  135. if m:
  136. path = m.group(1).strip()
  137. yield sse_event("file_saved", {
  138. "company": current_company,
  139. "path": path,
  140. })
  141. continue
  142. process.wait()
  143. returncode = process.returncode
  144. if returncode == 0:
  145. yield sse_event("done", {"message": "Extraction terminée avec succès ✓", "returncode": 0})
  146. else:
  147. yield sse_event("done", {"message": f"Extraction terminée avec des erreurs (code {returncode})", "returncode": returncode})
  148. return Response(
  149. stream_with_context(generate()),
  150. mimetype="text/event-stream",
  151. headers={
  152. "Cache-Control": "no-cache",
  153. "X-Accel-Buffering": "no", # Désactive le buffering Nginx si présent
  154. },
  155. )
  156. # Ancien endpoint conservé pour compatibilité
  157. @app.route("/run-extraction", methods=["POST"])
  158. def run_extraction():
  159. data = request.json
  160. inputs = build_inputs(data)
  161. script_dir = os.path.dirname(os.path.abspath(__file__))
  162. main_path = os.path.join(script_dir, "main.py")
  163. try:
  164. update_main_py(inputs, main_path)
  165. except Exception as e:
  166. return jsonify({"stdout": "", "stderr": f"Erreur écriture : {str(e)}", "returncode": -1})
  167. try:
  168. result = subprocess.run(
  169. [sys.executable, main_path],
  170. capture_output=True,
  171. text=True,
  172. cwd=script_dir,
  173. timeout=600,
  174. )
  175. return jsonify({
  176. "stdout": result.stdout,
  177. "stderr": result.stderr,
  178. "returncode": result.returncode,
  179. })
  180. except subprocess.TimeoutExpired:
  181. return jsonify({"stdout": "", "stderr": "Timeout dépassé", "returncode": -1})
  182. except Exception as e:
  183. return jsonify({"stdout": "", "stderr": f"Erreur exécution : {str(e)}", "returncode": -1})
  184. if __name__ == "__main__":
  185. app.run(debug=True, port=5000, threaded=True)