clean_DBSCAN.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import json
  2. def get_closest_col(x, current_cols):
  3. """Trouve la colonne dont l'X est le plus proche de la valeur."""
  4. return min(current_cols, key=lambda c: abs(x - c[0]))
  5. def transform_to_clean_markdown(data):
  6. current_cols = []
  7. output_lines = []
  8. for entry in data:
  9. points = entry.get("points", [])
  10. if not points:
  11. continue
  12. # 1. Détection et formatage des En-têtes (Cxxxx)
  13. headers_in_row = [(p[0], p[2]) for p in points if str(p[2]).startswith('C')]
  14. if headers_in_row:
  15. # On ajoute un séparateur si un tableau existait déjà avant
  16. if output_lines:
  17. output_lines.append("\n---\n")
  18. current_cols = headers_in_row
  19. col_names = [c[1] for c in current_cols]
  20. # Header propre sans coordonnées
  21. output_lines.append(f"| Code | {' | '.join(col_names)} |")
  22. output_lines.append(f"| :--- | {' | '.join([':---'] * len(col_names))} |")
  23. continue
  24. # 2. Détection et alignement des Données (Rxxxx)
  25. row_label_pt = next((p for p in points if str(p[2]).startswith('R')), None)
  26. if row_label_pt and current_cols:
  27. row_label = row_label_pt[2]
  28. # Initialisation de la ligne avec des "0"
  29. row_dict = {c[1]: "0" for c in current_cols}
  30. for p in points:
  31. x_val, _, text = p
  32. if text == row_label:
  33. continue
  34. # Placement précis via coordonnée X
  35. best_col = get_closest_col(x_val, current_cols)
  36. row_dict[best_col[1]] = str(text).strip()
  37. # Construction de la ligne finale
  38. ordered_values = [row_dict[c[1]] for c in current_cols]
  39. output_lines.append(f"| **{row_label}** | {' | '.join(ordered_values)} |")
  40. # Note : On ignore volontairement le bloc "else" (INFO) pour nettoyer le bruit
  41. return "\n".join(output_lines)
  42. #print(transform_to_clean_markdown(json))