First version

Generate RSS feeds from url Generate a basic website Upload to FTP
2026-05-08 16:22:00 +02:00 · 2026-05-08 16:22:00 +02:00 · a0e1913939
commit a0e1913939
parent a508ad697e
5 changed files with 222 additions and 0 deletions
--- a/gen_feed.py
+++ b/gen_feed.py
@ -0,0 +1,102 @@
+import requests
+import re
+import datetime
+import xml.dom.minidom
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse
+from zoneinfo import ZoneInfo
+from rfeed import *
+
+
+regex_date = r"/(\d{4})/(\d{2})/(\d{2})/"
+regex_time = r"à (\d+):(\d{2})"
+
+class Article:
+    def __init__(self, title: str, link: str, is_paid: bool):
+        self.title = title
+        self.link = link
+        res = re.search(regex_date, link)
+        h_m = self._get_time()
+        self.date = datetime.datetime(int(res.group(1)), int(res.group(2)), int(res.group(3)), h_m[0], h_m[1])
+        self.date = self.date.replace(tzinfo=ZoneInfo("Europe/Paris"))
+        self.date = self.date.astimezone(datetime.timezone.utc)
+        self.is_paid = is_paid
+    
+    def _get_time(self):
+        response = requests.get(self.link)
+        print(f"   Retrieving {self.link} to get pub time...")
+        if response.status_code != 200:
+            print(f"   Failed to get it ({response.status_code}, defaulting to 3AM)")
+            return (3, 0)  # Default to 3:00 AM
+        soup = BeautifulSoup(response.text, 'html.parser')
+        publish = soup.find("span", class_="publish").text
+        res = re.search(regex_time, publish)
+        if not res:
+            print(f"   Failed to parse it, defaulting to 3AM)")
+            return (3, 0)  # Default to 3:00 AM
+        return (int(res.group(1)), int(res.group(2)))
+
+
+    def full_title(self):
+        paid = ""
+        if self.is_paid:
+            paid = "[€] "
+        return f'{paid}{self.title}'
+
+    def __str__(self):
+        paid = ""
+        if self.is_paid:
+            paid = "[€] "
+        return f'{paid}{self.title} [{self.date}]'
+    
+    def __repr__(self):
+        return self.__str__()
+
+
+def generate_feed(town_url, feed_url, feed_path):
+    main_domain = urlparse(town_url).netloc
+
+    print(f"Retrieving {town_url}...")
+    response = requests.get(town_url)
+    if response.status_code != 200:
+        print(f"Failed to get url: {response.status_code}")
+        return False
+
+    soup = BeautifulSoup(response.text, 'html.parser')
+    articles = []
+
+    small_articles = soup.find(id="ListUneMain").find_all("div", class_="wrapperClickArticle") + soup.find(id="ListUneSecondary").find_all("div", class_="wrapperClickArticle")
+    print(f"Found {len(small_articles)} articles...")
+    for small_article in small_articles:
+        t = small_article.find_all("span")[1].text
+        link = "https://" + main_domain + small_article.find("a").get("href")
+        is_paid = len(small_article.find_all("span", class_="flagPaid")) > 0
+        articles.append(Article(t, link, is_paid))
+
+    print("Generating feed...")
+    items = []
+    for a in articles:
+        item = Item(title = a.full_title(), link= a.link, guid = Guid(a.link), pubDate = a.date)
+        items.append(item)
+    feed = Feed(title = soup.title.string,
+        link = feed_url,
+        description = soup.title.string + " - Derniers articles",
+        language = "fr-FR",
+        lastBuildDate = datetime.datetime.now(datetime.UTC),
+        items = items)
+    
+    print("Writing feed...")
+    with open(feed_path, 'w') as out_rss:
+        dom = xml.dom.minidom.parseString(feed.rss())
+        out_rss.write(dom.toprettyxml())
+    print("All done!")
+    return True
+
+
+if __name__ == '__main__':
+    town_url = "https://www.leprogres.fr/c/rhone/69163-quincieux"
+    town_url = "https://www.ledauphine.com/c/isere/38425-saint-maurice-l-exil"
+    town_url = "https://www.ledauphine.com/c/isere/38124-corbelin"
+    feed_url = "https://www.onnula.fr/infos/38124-corbelin.xml"
+    feed_path = "38124-corbelin.xml"
+    generate_feed(town_url, feed_url, feed_path)
--- a/gen_website.py
+++ b/gen_website.py
@ -0,0 +1,79 @@
+import json
+import locale
+import paramiko
+import os
+from datetime import datetime
+from gen_feed import generate_feed
+from dotenv import load_dotenv
+
+def get_publisher(link):
+    if "ledauphine.com" in link:
+        return "Le Dauphiné Libéré"
+    if "bienpublic.com" in link:
+        return "Le Bien Public"
+    if "leprogres.fr" in link:
+        return "Le Progrès"
+    return ""
+
+
+def upload_to_sftp(towns):
+
+    host = os.environ["FTP_HOST"]
+    port = int(os.environ["FTP_PORT"])
+    username = os.environ["FTP_USER"]
+    password = os.environ["FTP_PASSWORD"]
+
+    transport = paramiko.Transport((host, port))
+    transport.connect(username=username, password=password)
+
+    sftp = paramiko.SFTPClient.from_transport(transport)
+
+    sftp.put("index.html", "www/index.html")
+    for t in towns:
+        sftp.put(t[3], f"www/{t[3]}")
+
+    sftp.close()
+    transport.close()
+
+
+def generate_web():
+    # Load from env
+    urls = os.environ["URLS"].split(",")
+    website = os.environ["WEBSITE"]
+
+    towns = []
+    print("Getting articles for each link...")
+    for link in urls:
+        town_id = link.rsplit('/', 1)[-1]
+        town_xml = town_id + ".xml"
+        town_rss = website + "/" + town_xml
+        if not generate_feed(link, town_rss, town_xml):
+            print(f"Failed to generate feed for {town_id}")
+            return False
+        towns.append((town_id, town_rss, get_publisher(link), town_xml))
+    
+    print("Updating HTML template...")
+    with open("template.html", 'r', encoding="utf-8") as template:
+        data = template.read()
+    
+    li = ""
+    for town in towns:
+        li += f'<li><a href="{town[1]}">{town[0]}</a> ({town[2]})</li>'
+    data = data.replace("$$$TOWN_LIST$$$", li)
+
+    locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8')
+    now = datetime.now()
+    data = data.replace("$$$GENERATED_DATETIME$$$", now.strftime("%H:%M le %A %d %B %Y"))
+    with open("index.html", 'w', encoding="utf-8") as out:
+        out.write(data)
+    
+    print("Uploading to FTP...")
+    upload_to_sftp(towns)
+
+    print("All done!")
+    return True
+
+
+if __name__ == '__main__':
+    load_dotenv()
+    generate_web()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+rfeed
+beautifulsoup4
+requests
+paramiko
+python-dotenv
--- a/template.env
+++ b/template.env
@ -0,0 +1,6 @@
+FTP_HOST=
+FTP_PORT=
+FTP_USER=
+FTP_PASSWORD=
+WEBSITE=
+URLS=
--- a/template.html
+++ b/template.html
@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <title>Flux RSS pour villes et villages couverts par le groupe EBRA</title>
+        <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+        <style>
+            body {
+                background-color: rgb(245, 238, 220);
+                text-align: center;
+                font-family: Arial, Helvetica, sans-serif;
+            }
+            footer {
+                font-size: small;
+                color: rgb(65, 65, 65);
+            }
+            ul {
+                list-style-type: none;
+                padding: 0;
+            }
+        </style>
+    </head>
+    <body>
+        <h1>Flux RSS</h1>
+        <h2>Listes des villes couvertes par cette instance</h2>
+        <ul>
+            $$$TOWN_LIST$$$
+        </ul>
+        <footer>Généré à $$$GENERATED_DATETIME$$$ - <a href="https://onnula.fr/forgejo/cyril/RSSEbra">Source</a></footer>
+    </body>
+</html>