from flask import Flask, render_template, request import unicodedata import sqlite3 import json import os APP_DIR = os.path.dirname(__file__) DB_PATH = os.path.join(APP_DIR, 'qcm.db') app = Flask(__name__) def get_questions(): conn = sqlite3.connect(DB_PATH) # Récupérer les champs textes comme octets bruts pour les décoder manuellement conn.text_factory = bytes c = conn.cursor() c.execute('SELECT id, question, answers, last_scraped FROM questions ORDER BY id') rows = c.fetchall() conn.close() questions = [] def norm(s): return unicodedata.normalize('NFC', s) if isinstance(s, str) else s def decode_text(raw) -> str: """Décoder une valeur provenant de la base : bytes ou str. Essaie plusieurs décodages usuels pour éviter le caractère de remplacement �. """ if raw is None: return raw # Si on reçoit déjà une str, vérifier si elle contient des séquences suspectes if isinstance(raw, str): s = raw if '\ufffd' not in s and 'Ã' not in s and 'Â' not in s: return norm(s) # tenter de ré-interpréter comme latin1 -> utf-8 try: cand = s.encode('latin1').decode('utf-8') if '\ufffd' not in cand: return norm(cand) except Exception: pass return s # Si raw est bytes, essayer plusieurs encodages if isinstance(raw, (bytes, bytearray)): b = bytes(raw) # Ordre: utf-8 strict, cp1252, latin1, utf-8 replace try: s = b.decode('utf-8') # Cas fréquent : double-encodage UTF-8 -> on obtient des séquences "Ã"/"Â". # Tenter la réparation double-encodage : encoder en latin1 puis décoder en utf-8. if 'Ã' in s or 'Â' in s: try: repaired = s.encode('latin1', errors='replace').decode('utf-8', errors='replace') # si la réparation donne des caractères accentués, la garder if any(ch in repaired for ch in 'éèàêôçùÉÈÀÂ'): return norm(repaired) except Exception: pass if '\ufffd' not in s: return norm(s) except Exception: pass for enc in ('cp1252', 'latin1'): try: s = b.decode(enc) if '\ufffd' not in s: return norm(s) except Exception: continue # fallback permissif try: return norm(b.decode('utf-8', errors='replace')) except Exception: return norm(b.decode('latin1', errors='replace')) # si autre type, forcer str try: return norm(str(raw)) finally: pass for r in rows: qid, text, answers_json, last_scraped = r # Décoder proprement les champs (text, answers_json, last_scraped peuvent être bytes) text = decode_text(text) if text is not None else text answers_str = decode_text(answers_json) if answers_json is not None else '[]' last_scraped = decode_text(last_scraped) if last_scraped is not None else None try: answers = json.loads(answers_str) except Exception: answers = [] # normalize answers structure formatted = [] for i, a in enumerate(answers): # a is expected to be dict with 'text' and 'correct' at = a.get('text') if isinstance(a, dict) else str(a) ac = a.get('correct') if isinstance(a, dict) else False at = decode_text(at) formatted.append({'idx': i, 'text': at, 'correct': bool(ac)}) questions.append({'id': qid, 'text': text, 'answers': formatted, 'last_scraped': last_scraped}) return questions @app.route('/') def index(): qs = get_questions() # Sérialise les questions en JSON côté serveur pour l'insérer dans le JS sans dépendre du filtre tojson import json as _json # compact JSON, puis échapper questions_json = _json.dumps(qs, ensure_ascii=False, separators=(',',':')) questions_json = questions_json.replace('