diff --git a/gen_feed.py b/gen_feed.py index dfeabf1..23bcf06 100644 --- a/gen_feed.py +++ b/gen_feed.py @@ -16,19 +16,30 @@ class Article: self.title = title self.link = link res = re.search(regex_date, link) - h_m = self._get_time() + self.text = "" + h_m = self._get_time_and_text() self.date = datetime.datetime(int(res.group(1)), int(res.group(2)), int(res.group(3)), h_m[0], h_m[1]) self.date = self.date.replace(tzinfo=ZoneInfo("Europe/Paris")) self.date = self.date.astimezone(datetime.timezone.utc) self.is_paid = is_paid - def _get_time(self): + def _get_time_and_text(self): response = requests.get(self.link) print(f" Retrieving {self.link} to get pub time...") if response.status_code != 200: print(f" Failed to get it ({response.status_code}, defaulting to 3AM)") return (3, 0) # Default to 3:00 AM soup = BeautifulSoup(response.text, 'html.parser') + + # Handle text + text = [] + if soup.find("div", class_="chapo") is not None: + text.append(soup.find("div", class_="chapo").text) + paragraphs = soup.find_all("div", class_="textComponent") + for p in paragraphs: + text.append(p.text) + self.text = "\n".join(text) + publish = soup.find("span", class_="publish").text res = re.search(regex_time, publish) if not res: @@ -76,7 +87,7 @@ def generate_feed(town_url, feed_url, feed_path): print("Generating feed...") items = [] for a in articles: - item = Item(title = a.full_title(), link= a.link, guid = Guid(a.link), pubDate = a.date) + item = Item(title = a.full_title(), link= a.link, guid = Guid(a.link), pubDate = a.date, description = a.text) items.append(item) feed = Feed(title = soup.title.string, link = feed_url,