Parse entry.summary for any entry

This commit is contained in:
vas3k
2020-01-10 09:34:20 +01:00
parent a06f3cdfa8
commit 0dba3be080

View File

@@ -131,8 +131,18 @@ def refresh_feed(item):
if is_created:
# parse heavy info
text, lead_image = parse_text_and_image(entry)
if text:
article.description = text[:1000]
if lead_image:
article.image = lead_image[:512]
# get real url
real_url, content_type, content_length = resolve_url(entry)
# load and summarize article
if content_length <= MAX_PARSABLE_CONTENT_LENGTH \
and content_type.startswith("text/"): # to not try to parse podcasts :D
@@ -140,14 +150,6 @@ def refresh_feed(item):
article.url = real_url[:2000]
article.domain = parse_domain(real_url)[:256]
text, lead_image = parse_text_and_image(entry)
if text:
article.description = text[:1000]
if lead_image:
article.image = lead_image[:512]
try:
summary, summary_image = load_and_parse_full_article_text_and_image(article.url)
except ArticleException:
@@ -160,7 +162,7 @@ def refresh_feed(item):
if summary_image:
article.image = summary_image[:512]
article.save()
article.save()
week_ago = datetime.utcnow() - timedelta(days=7)
frequency = Article.objects.filter(feed_id=item["id"], created_at__gte=week_ago).count()