From a72f50234210e5a186615f30d712de3de675fe69 Mon Sep 17 00:00:00 2001 From: Puechberty Arthur Date: Mon, 30 Mar 2026 20:42:29 +0200 Subject: [PATCH] first commit --- .dockerignore | 15 ++ .gitignore | 35 ++++ Dockerfile | 23 +++ README.md | 53 ++++++ docker-compose.yml | 23 +++ main.py | 172 ++++++++++++++++++++ requirements.txt | 10 ++ static/css/style.css | 62 +++++++ templates/base.html | 34 ++++ templates/external.html | 11 ++ templates/index.html | 349 ++++++++++++++++++++++++++++++++++++++++ templates/result.html | 36 +++++ webapp.py | 178 ++++++++++++++++++++ 13 files changed, 1001 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 docker-compose.yml create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 static/css/style.css create mode 100644 templates/base.html create mode 100644 templates/external.html create mode 100644 templates/index.html create mode 100644 templates/result.html create mode 100644 webapp.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d8cbb3d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +__pycache__/ +*.pyc +.venv/ +venv/ +env/ +.env +.git +*.db +__pycache__ +*.egg-info +node_modules/ +/.vscode +/.idea +build/ +dist/ \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..10c0a2d --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Python +__pycache__/ +*.py[cod] +*.so +.Python +.pytest_cache/ +.mypy_cache/ + +# Virtual environments +venv/ +.venv/ +env/ + +# Local environment and secrets +.env +.env.* +!.env.example + +# Local database and runtime files +*.db +*.sqlite +*.sqlite3 + +# IDE/editor +.vscode/ +.idea/ + +# OS +.DS_Store +Thumbs.db + +# Build/distribution +build/ +dist/ +*.egg-info/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fa6730c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +# system deps for building some packages +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# copy requirements first for caching +COPY requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r /app/requirements.txt + +# copy app +COPY . /app + +EXPOSE 5000 + +# default command runs the webapp; docker-compose will override for scraper service +CMD ["python", "webapp.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..79bb371 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# QCM Physique + +Application Flask pour s'entrainer avec des QCM de physique. + +Projet en ligne: [qcu.arthurp.fr](https://qcu.arthurp.fr) + +## Fonctionnalites + +- Scraping periodique des questions depuis une source distante. +- Stockage local dans SQLite (`qcm.db`). +- Interface web avec mode complet et mode aleatoire. +- Affichage du score et correction des reponses. + +## Stack technique + +- Python 3.11 +- Flask +- SQLite +- Docker / Docker Compose + +## Lancer le projet en local avec Docker + +Prerrequis: + +- Docker +- Docker Compose + +Commandes: + +```bash +docker compose build +docker compose up -d +``` + +Application web disponible sur: http://localhost:5000 + +## Services Docker + +- `web`: demarre l'application Flask. +- `scraper`: execute `main.py` en boucle pour mettre a jour `qcm.db`. + +Voir les logs: + +```bash +docker compose logs -f web +docker compose logs -f scraper +``` + +Arreter les conteneurs: + +```bash +docker compose down +``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ecf112e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,23 @@ +services: + web: + image: python:3.11-slim + working_dir: /app + volumes: + - ./:/app + ports: + - "5000:5000" + restart: unless-stopped + environment: + - PYTHONUNBUFFERED=1 + - FLASK_ENV=production + command: sh -c "pip install --no-cache-dir -r requirements.txt && python webapp.py" + + scraper: + image: python:3.11-slim + working_dir: /app + volumes: + - ./:/app + restart: unless-stopped + environment: + - PYTHONUNBUFFERED=1 + command: sh -c "pip install --no-cache-dir -r requirements.txt && python main.py" diff --git a/main.py b/main.py new file mode 100644 index 0000000..5bea181 --- /dev/null +++ b/main.py @@ -0,0 +1,172 @@ +import requests +from bs4 import BeautifulSoup +import sqlite3 +import time +import hashlib +import logging +import signal +import sys +import unicodedata +from datetime import datetime + +# --- CONFIGURATION --- +URL = "https://alienor.myds.me/~cahierlabo/tmp/qcm_entrainement.html" +DB_FILE = "qcm.db" +INTERVAL = 10 * 60 # 10 minutes en secondes + +# --- SETUP DE LA DB --- +conn = sqlite3.connect(DB_FILE, timeout=10) +c = conn.cursor() + +# Crée la table si elle n'existe pas et ajoute un hash unique pour éviter les doublons +c.execute(''' +CREATE TABLE IF NOT EXISTS questions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + question TEXT, + question_hash TEXT UNIQUE, + answers TEXT, + last_scraped TEXT, + UNIQUE(question) +) +''') +# Index unique sur le hash pour garantir unicité même si le texte a de légères différences +c.execute('CREATE UNIQUE INDEX IF NOT EXISTS idx_question_hash ON questions(question_hash)') +conn.commit() + +# --- FONCTIONS --- +def fetch_qcm(session): + logging.info("Récupération du QCM...") + r = session.get(URL, timeout=10) + r.raise_for_status() + soup = BeautifulSoup(r.text, "html.parser") + + questions_data = [] + + # Récupère toutes les questions + li_questions = soup.select("li.QuizQuestion") + from urllib.parse import urljoin + + for qi, q in enumerate(li_questions): + qt_elem = q.select_one(".QuestionText") + question_html = qt_elem.decode_contents() if qt_elem else "" + + # convertir les src d'images en URLs absolues et récupérer la liste des images + q_soup = BeautifulSoup(question_html, "html.parser") + images = [] + for img in q_soup.find_all('img'): + src = img.get('src') + if src: + src_abs = urljoin(URL, src) + img['src'] = src_abs + images.append(src_abs) + + # question_html modifié avec src absolues + question_html = str(q_soup) + # texte brut pour le hash/normalisation + question_text = q_soup.get_text(" ", strip=True) + + # Récupère les réponses + li_answers = q.select("ol li") + answers_list = [] + for ai, li in enumerate(li_answers): + text = li.get_text(strip=True) + # enlever le ou les '?' de début (le site utilise un bouton avec '?') + import re + text = re.sub(r'^[\?\s]+', '', text) + + # On essaie de trouver si la réponse est bonne depuis le JS I + # On récupère l'array I depuis le script + try: + # Cherche le script contenant I = [...] + script_tag = soup.find("script", text=lambda t: t and "I[" in t) + js_text = script_tag.string if script_tag else "" + + # Cherche le pattern correspondant à la bonne réponse + # Exemple : I[0][3][0]=new Array('réponse', '', 1, 100, 1); + import re + pattern = re.compile(rf"I\[{qi}\]\[3\]\[{ai}\]=new Array\('.*?','',(\d),\d+,\d+\);") + match = pattern.search(js_text) + correct = match.group(1) == '1' if match else False + except Exception: + correct = False + + answers_list.append({"text": text, "correct": correct}) + + questions_data.append({ + "question": question_html, + "question_text": question_text, + "images": images, + "answers": answers_list, + "last_scraped": datetime.utcnow().isoformat() + }) + + return questions_data + +def save_to_db(questions): + import json + for q in questions: + answers_json = json.dumps(q["answers"], ensure_ascii=False) + + # Calcule un hash normalisé de la question (utilise le texte brut) pour empêcher doublons + q_norm = normalize_question(q.get("question_text") or q.get("question")) + qhash = hashlib.sha256(q_norm.encode('utf-8')).hexdigest() + last_scraped = q.get('last_scraped') or datetime.utcnow().isoformat() + + # Insert ou update selon si le hash existe déjà + c.execute(''' + INSERT INTO questions(question, question_hash, answers, last_scraped) + VALUES(?, ?, ?, ?) + ON CONFLICT(question_hash) DO UPDATE SET question=excluded.question, answers=excluded.answers, last_scraped=excluded.last_scraped + ''', (q["question"], qhash, answers_json, last_scraped)) + conn.commit() + logging.info(f"{len(questions)} questions sauvegardées / mises à jour dans la DB.") + + +def normalize_question(text): + # Normalise unicode, retire espaces multiples et passe en minuscule + if not text: + return '' + # si le texte contient du HTML, extraire le texte brut + if '<' in text and '>' in text: + try: + text = BeautifulSoup(text, 'html.parser').get_text(' ', strip=True) + except Exception: + pass + + s = unicodedata.normalize('NFKC', text) + s = ' '.join(s.split()) + return s.strip().lower() + +# --- LOGGING ET SIGNALS --- +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s') + +def shutdown(signum, frame): + logging.info("Arrêt demandé, fermeture de la DB.") + try: + conn.commit() + conn.close() + except Exception: + pass + sys.exit(0) + +signal.signal(signal.SIGINT, shutdown) +signal.signal(signal.SIGTERM, shutdown) + +# --- BOUCLE PRINCIPALE --- +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +session = requests.Session() +retries = Retry(total=3, backoff_factor=1, status_forcelist=[429,500,502,503,504]) +session.mount('https://', HTTPAdapter(max_retries=retries)) +session.mount('http://', HTTPAdapter(max_retries=retries)) + +while True: + try: + data = fetch_qcm(session) + save_to_db(data) + except Exception as e: + logging.exception("Erreur lors de la récupération ou sauvegarde:") + + logging.info(f"Attente {INTERVAL//60} minutes...") + time.sleep(INTERVAL) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..42ab4b3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +# Web +Flask>=2.0 + +# Scraping +requests>=2.28 +beautifulsoup4>=4.11 +urllib3>=1.26 + +# Optional WSGI server +gunicorn>=20.1 diff --git a/static/css/style.css b/static/css/style.css new file mode 100644 index 0000000..95be767 --- /dev/null +++ b/static/css/style.css @@ -0,0 +1,62 @@ +/* Global reset */ +* { box-sizing: border-box; margin: 0; padding: 0; } +html,body{height:100%;} +:root{ + --bg:#f7f9fb; + --card:#ffffff; + --muted:#707b86; + --accent:#0d6efd; + --success:#198754; + --danger:#dc3545; + --radius:8px; + --max-width:1100px; + --gap:12px; + --font-sans: Inter, Roboto, "Segoe UI", Arial, sans-serif; +} +body{ + font-family: var(--font-sans); + background: var(--bg); + color: #12222b; + -webkit-font-smoothing:antialiased; + -moz-osx-font-smoothing:grayscale; + padding: 24px 16px; +} +.wrapper{max-width:var(--max-width);margin:0 auto;} +.site-header{display:flex;align-items:center;gap:12px;margin-bottom:18px;} +.brand{display:flex;align-items:center;gap:10px} +.brand h1{font-size:1.25rem;margin:0;font-weight:600} +.site-actions{margin-left:auto;display:flex;gap:8px} +.btn{display:inline-flex;align-items:center;gap:8px;background:var(--accent);color:white;border:none;padding:8px 12px;border-radius:6px;text-decoration:none;cursor:pointer} +.btn.secondary{background:#f0f2f5;color:var(--muted);border:1px solid #e1e6ea} +.small-btn{padding:6px 10px;border-radius:6px} +.tabs{display:flex;gap:8px;margin-bottom:12px} +.tab-button{background:transparent;border:1px solid transparent;padding:8px 10px;border-radius:6px;cursor:pointer;color:var(--muted)} +.tab-button.active{background:var(--accent);color:#fff} +.controls{display:flex;gap:8px;align-items:center;margin-bottom:12px} +.container-card{background:var(--card);border-radius:var(--radius);padding:14px;border:1px solid #e9eef2} +.question{margin-bottom:12px;padding:12px;border-radius:10px;background:linear-gradient(180deg,#fff,#fbfdff);border:1px solid #e6eef5} +.question > div:first-child{margin-bottom:8px} +.answers{margin-top:6px;display:flex;flex-direction:column;gap:6px} +.answer{display:flex;align-items:center;gap:8px;padding:9px;border-radius:8px;cursor:pointer;border:1px solid transparent} +.answer:hover{background:#f6fbff} +.answer input{margin-right:8px} +.answer.correct{background:#eaf6ec;border-color:rgba(25,135,84,0.15)} +.answer.wrong{background:#fff1f2;border-color:rgba(220,53,69,0.12)} +.feedback{margin-top:8px;font-weight:600} +.feedback .good{color:var(--success)} +.feedback .bad{color:var(--danger)} +.manage-item{padding:10px;border-radius:8px;border:1px solid #f0f3f5;background:linear-gradient(180deg,#fff,#fbfdff);margin-bottom:8px} +.hidden{display:none} +.footer{margin-top:22px;padding:12px;text-align:center;color:var(--muted);font-size:0.9rem} +.iframe-wrap{border-radius:8px;overflow:hidden;border:1px solid #e6eef5;background:white} +.iframe-wrap iframe{width:100%;height:80vh;border:0;display:block} +@media (max-width:800px){ + .tabs{flex-wrap:wrap} + .site-header{flex-direction:column;align-items:flex-start;gap:8px} + .site-actions{margin-left:0} + .iframe-wrap iframe{height:60vh} +} +@media (max-width:420px){ + .brand h1{font-size:1rem} + .btn{padding:8px} +} diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..d92af95 --- /dev/null +++ b/templates/base.html @@ -0,0 +1,34 @@ + + + + + + QCM - Entraînement + + + + + + +
+
+ {% block content %}{% endblock %} +
+
+ + + diff --git a/templates/external.html b/templates/external.html new file mode 100644 index 0000000..9c6235b --- /dev/null +++ b/templates/external.html @@ -0,0 +1,11 @@ +{% extends "base.html" %} + +{% block content %} +
+ Retour +
Affichage du site externe
+
+
+ +
+{% endblock %} diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..6f22ac1 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,349 @@ +{% extends "base.html" %} + +{% block content %} +
+ + + +
+ + +
+
+ + + +
+
+
+ {% for q in questions %} +
+
Question {{ loop.index }}: {{ q.text | safe }}
+
+ {% for a in q.answers %} + + {% endfor %} +
+ +
+ {% endfor %} +
+
+ + + + + + + + + + + + +{% endblock %} diff --git a/templates/result.html b/templates/result.html new file mode 100644 index 0000000..e26a08d --- /dev/null +++ b/templates/result.html @@ -0,0 +1,36 @@ +{% extends "base.html" %} + +{% block content %} +
Score: {{ score.correct }} / {{ score.total }}
+ + {% for r in results %} +
+
Question {{ loop.index }}: {{ r.text | safe }}
+
+ {% for a in r.answers %} +
+ {% if a.is_correct %} + (Bonne) + {% endif %} + {% if a.selected and not a.is_correct %} + (Votre choix incorrect) + {% endif %} + {{ a.text }} +
+ {% endfor %} +
+
+ {% if r.question_correct %} + Question correcte + {% else %} + Question incorrecte + {% endif %} +
+
+ {% endfor %} + +
+ Recommencer +
+ +{% endblock %} diff --git a/webapp.py b/webapp.py new file mode 100644 index 0000000..ca51095 --- /dev/null +++ b/webapp.py @@ -0,0 +1,178 @@ +from flask import Flask, render_template, request +import unicodedata +import sqlite3 +import json +import os + +APP_DIR = os.path.dirname(__file__) +DB_PATH = os.path.join(APP_DIR, 'qcm.db') + +app = Flask(__name__) + + +def get_questions(): + conn = sqlite3.connect(DB_PATH) + # Récupérer les champs textes comme octets bruts pour les décoder manuellement + conn.text_factory = bytes + c = conn.cursor() + c.execute('SELECT id, question, answers, last_scraped FROM questions ORDER BY id') + rows = c.fetchall() + conn.close() + + questions = [] + + def norm(s): + return unicodedata.normalize('NFC', s) if isinstance(s, str) else s + + def decode_text(raw) -> str: + """Décoder une valeur provenant de la base : bytes ou str. + Essaie plusieurs décodages usuels pour éviter le caractère de remplacement �. + """ + if raw is None: + return raw + + # Si on reçoit déjà une str, vérifier si elle contient des séquences suspectes + if isinstance(raw, str): + s = raw + if '\ufffd' not in s and 'Ã' not in s and 'Â' not in s: + return norm(s) + # tenter de ré-interpréter comme latin1 -> utf-8 + try: + cand = s.encode('latin1').decode('utf-8') + if '\ufffd' not in cand: + return norm(cand) + except Exception: + pass + return s + + # Si raw est bytes, essayer plusieurs encodages + if isinstance(raw, (bytes, bytearray)): + b = bytes(raw) + # Ordre: utf-8 strict, cp1252, latin1, utf-8 replace + try: + s = b.decode('utf-8') + # Cas fréquent : double-encodage UTF-8 -> on obtient des séquences "Ã"/"Â". + # Tenter la réparation double-encodage : encoder en latin1 puis décoder en utf-8. + if 'Ã' in s or 'Â' in s: + try: + repaired = s.encode('latin1', errors='replace').decode('utf-8', errors='replace') + # si la réparation donne des caractères accentués, la garder + if any(ch in repaired for ch in 'éèàêôçùÉÈÀÂ'): + return norm(repaired) + except Exception: + pass + if '\ufffd' not in s: + return norm(s) + except Exception: + pass + + for enc in ('cp1252', 'latin1'): + try: + s = b.decode(enc) + if '\ufffd' not in s: + return norm(s) + except Exception: + continue + + # fallback permissif + try: + return norm(b.decode('utf-8', errors='replace')) + except Exception: + return norm(b.decode('latin1', errors='replace')) + + # si autre type, forcer str + try: + return norm(str(raw)) + finally: + pass + + for r in rows: + qid, text, answers_json, last_scraped = r + # Décoder proprement les champs (text, answers_json, last_scraped peuvent être bytes) + text = decode_text(text) if text is not None else text + answers_str = decode_text(answers_json) if answers_json is not None else '[]' + last_scraped = decode_text(last_scraped) if last_scraped is not None else None + try: + answers = json.loads(answers_str) + except Exception: + answers = [] + + # normalize answers structure + formatted = [] + for i, a in enumerate(answers): + # a is expected to be dict with 'text' and 'correct' + at = a.get('text') if isinstance(a, dict) else str(a) + ac = a.get('correct') if isinstance(a, dict) else False + at = decode_text(at) + formatted.append({'idx': i, 'text': at, 'correct': bool(ac)}) + + questions.append({'id': qid, 'text': text, 'answers': formatted, 'last_scraped': last_scraped}) + + return questions + + +@app.route('/') +def index(): + qs = get_questions() + # Sérialise les questions en JSON côté serveur pour l'insérer dans le JS sans dépendre du filtre tojson + import json as _json + # compact JSON, puis échapper + questions_json = _json.dumps(qs, ensure_ascii=False, separators=(',',':')) + questions_json = questions_json.replace('