import json def get_closest_col(x, current_cols, max_distance=8.0): """Trouve la colonne la plus proche, dans une distance max (en unités PDF).""" best = min(current_cols, key=lambda c: abs(x - c[0])) if abs(x - best[0]) <= max_distance: return best return None # Aucune colonne assez proche def transform_to_clean_markdown(data): current_cols = [] output_lines = [] for entry in data: points = entry.get("points", []) if not points: continue # 1. Détection des en-têtes : tout ce qui est sur une ligne "header" # On détecte la ligne header si elle contient AU MOINS un Cxxxx has_header = any(str(p[2]).startswith('C') for p in points) if has_header: if output_lines: output_lines.append("\n---\n") # On prend TOUS les points comme colonnes (y compris 06000, etc.) # sauf ceux qui ressemblent à un label de ligne (Rxxxx) current_cols = [(p[0], p[2]) for p in points if not str(p[2]).startswith('R')] col_names = [c[1] for c in current_cols] output_lines.append(f"| Code | {' | '.join(col_names)} |") output_lines.append(f"| :--- | {' | '.join([':---'] * len(col_names))} |") continue # 2. Lignes de données (Rxxxx) row_label_pt = next((p for p in points if str(p[2]).startswith('R')), None) if row_label_pt and current_cols: row_label = row_label_pt[2] row_dict = {c[1]: "0" for c in current_cols} for p in points: x_val, _, text = p if text == row_label: continue best_col = get_closest_col(x_val, current_cols, max_distance=8.0) if best_col: row_dict[best_col[1]] = str(text).strip() # Si aucune colonne proche → on ignore (valeur hors tableau) ordered_values = [row_dict[c[1]] for c in current_cols] output_lines.append(f"| **{row_label}** | {' | '.join(ordered_values)} |") return "\n".join(output_lines) #print(transform_to_clean_markdown(json))