Add possible description in RSS item
This commit is contained in:
parent
a0e1913939
commit
3b217a6501
1 changed files with 14 additions and 3 deletions
17
gen_feed.py
17
gen_feed.py
|
|
@ -16,19 +16,30 @@ class Article:
|
|||
self.title = title
|
||||
self.link = link
|
||||
res = re.search(regex_date, link)
|
||||
h_m = self._get_time()
|
||||
self.text = ""
|
||||
h_m = self._get_time_and_text()
|
||||
self.date = datetime.datetime(int(res.group(1)), int(res.group(2)), int(res.group(3)), h_m[0], h_m[1])
|
||||
self.date = self.date.replace(tzinfo=ZoneInfo("Europe/Paris"))
|
||||
self.date = self.date.astimezone(datetime.timezone.utc)
|
||||
self.is_paid = is_paid
|
||||
|
||||
def _get_time(self):
|
||||
def _get_time_and_text(self):
|
||||
response = requests.get(self.link)
|
||||
print(f" Retrieving {self.link} to get pub time...")
|
||||
if response.status_code != 200:
|
||||
print(f" Failed to get it ({response.status_code}, defaulting to 3AM)")
|
||||
return (3, 0) # Default to 3:00 AM
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Handle text
|
||||
text = []
|
||||
if soup.find("div", class_="chapo") is not None:
|
||||
text.append(soup.find("div", class_="chapo").text)
|
||||
paragraphs = soup.find_all("div", class_="textComponent")
|
||||
for p in paragraphs:
|
||||
text.append(p.text)
|
||||
self.text = "\n".join(text)
|
||||
|
||||
publish = soup.find("span", class_="publish").text
|
||||
res = re.search(regex_time, publish)
|
||||
if not res:
|
||||
|
|
@ -76,7 +87,7 @@ def generate_feed(town_url, feed_url, feed_path):
|
|||
print("Generating feed...")
|
||||
items = []
|
||||
for a in articles:
|
||||
item = Item(title = a.full_title(), link= a.link, guid = Guid(a.link), pubDate = a.date)
|
||||
item = Item(title = a.full_title(), link= a.link, guid = Guid(a.link), pubDate = a.date, description = a.text)
|
||||
items.append(item)
|
||||
feed = Feed(title = soup.title.string,
|
||||
link = feed_url,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue