| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import json
- def get_closest_col(x, current_cols, max_distance=8.0):
- """Trouve la colonne la plus proche, dans une distance max (en unités PDF)."""
- best = min(current_cols, key=lambda c: abs(x - c[0]))
- if abs(x - best[0]) <= max_distance:
- return best
- return None # Aucune colonne assez proche
- def transform_to_clean_markdown(data):
- current_cols = []
- output_lines = []
- for entry in data:
- points = entry.get("points", [])
- if not points:
- continue
- # 1. Détection des en-têtes : tout ce qui est sur une ligne "header"
- # On détecte la ligne header si elle contient AU MOINS un Cxxxx
- has_header = any(str(p[2]).startswith('C') for p in points)
- if has_header:
- if output_lines:
- output_lines.append("\n---\n")
- # On prend TOUS les points comme colonnes (y compris 06000, etc.)
- # sauf ceux qui ressemblent à un label de ligne (Rxxxx)
- current_cols = [(p[0], p[2]) for p in points if not str(p[2]).startswith('R')]
- col_names = [c[1] for c in current_cols]
- output_lines.append(f"| Code | {' | '.join(col_names)} |")
- output_lines.append(f"| :--- | {' | '.join([':---'] * len(col_names))} |")
- continue
- # 2. Lignes de données (Rxxxx)
- row_label_pt = next((p for p in points if str(p[2]).startswith('R')), None)
- if row_label_pt and current_cols:
- row_label = row_label_pt[2]
- row_dict = {c[1]: "0" for c in current_cols}
- for p in points:
- x_val, _, text = p
- if text == row_label:
- continue
- best_col = get_closest_col(x_val, current_cols, max_distance=8.0)
- if best_col:
- row_dict[best_col[1]] = str(text).strip()
- # Si aucune colonne proche → on ignore (valeur hors tableau)
- ordered_values = [row_dict[c[1]] for c in current_cols]
- output_lines.append(f"| **{row_label}** | {' | '.join(ordered_values)} |")
- return "\n".join(output_lines)
- #print(transform_to_clean_markdown(json))
|