diff --git a/boards.yml b/boards.yml
index 917ed82..e75d320 100644
--- a/boards.yml
+++ b/boards.yml
@@ -62,7 +62,7 @@ boards:
- name: MIT Technology Review
rss: https://www.technologyreview.com/topnews.rss
url: https://www.technologyreview.com
- - name: Мейкерство
+ - name: Инди-разработка
slug: make
feeds:
- name: Show HN
diff --git a/boards/migrations/0002_article_summary.py b/boards/migrations/0002_article_summary.py
new file mode 100644
index 0000000..348ad8b
--- /dev/null
+++ b/boards/migrations/0002_article_summary.py
@@ -0,0 +1,18 @@
+# Generated by Django 2.2.8 on 2020-01-05 22:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('boards', '0001_initial'),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name='article',
+ name='summary',
+ field=models.TextField(null=True),
+ ),
+ ]
diff --git a/boards/models.py b/boards/models.py
index 98483d3..21c825c 100644
--- a/boards/models.py
+++ b/boards/models.py
@@ -137,6 +137,7 @@ class Article(models.Model):
title = models.CharField(max_length=256)
image = models.URLField(max_length=512, null=True)
description = models.TextField(null=True)
+ summary = models.TextField(null=True)
created_at = models.DateTimeField(db_index=True)
updated_at = models.DateTimeField()
diff --git a/boards/templatetags/text_filters.py b/boards/templatetags/text_filters.py
index 2b9b789..2f3b383 100755
--- a/boards/templatetags/text_filters.py
+++ b/boards/templatetags/text_filters.py
@@ -15,6 +15,17 @@ def pretty_url(value):
return re.sub(r"https?://(www\.)?", "", value, 1)
+@register.filter(is_safe=True)
+def nl2br(text):
+ """
+ Replaces \n to
+ """
+ if not text:
+ return ""
+ text = text.replace("\n", "
")
+ return text
+
+
@register.filter
def cool_number(value, num_decimals=1):
"""
diff --git a/infomate/settings.py b/infomate/settings.py
index ecd4a3e..49c44ee 100644
--- a/infomate/settings.py
+++ b/infomate/settings.py
@@ -88,7 +88,7 @@ CSS_HASH = str(random())
# App settings
APP_NAME = "Infomate"
-APP_TITLE = "Читай то, что читают другие"
+APP_TITLE = "Читай, что читают другие"
APP_DESCRIPTION = ""
APP_HOST = "https://infomate.club"
diff --git a/requirements.txt b/requirements.txt
index a9a51ee..0982401 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ pyyaml==5.2
feedparser==5.2.1
sentry-sdk==0.13.5
pyjwt==1.7.1
+newspaper3k>=0.2.8
diff --git a/scripts/update.py b/scripts/update.py
index d0c67ba..615b90e 100644
--- a/scripts/update.py
+++ b/scripts/update.py
@@ -18,6 +18,8 @@ import requests
import click
import feedparser
from bs4 import BeautifulSoup
+from requests import RequestException
+from newspaper import Article as NewspaperArticle
from boards.models import BoardFeed, Article, Board
@@ -81,7 +83,10 @@ def worker():
if task is None:
break
- refresh_feed(task)
+ try:
+ refresh_feed(task)
+ except Exception:
+ pass # to avoid infinite wait in .join()
queue.task_done()
@@ -106,21 +111,27 @@ def refresh_feed(item):
if is_created:
# parse heavy info
- try:
- real_url = resolve_real_url(entry)
+ real_url = resolve_real_url(entry)
+
+ if real_url:
article.url = real_url[:2000]
article.domain = parse_domain(real_url)[:256]
- except ConnectionError:
- log.warning(f"Failed to resolve real URL: {entry.link}")
- summary, lead_image = parse_entry_text_and_image(entry)
+ text, lead_image = parse_entry_text_and_image(entry)
- if summary:
- article.description = summary[:1000]
+ if text:
+ article.description = text[:1000]
if lead_image:
article.image = lead_image[:512]
+ summary, summary_image = load_and_parse_full_article_text_and_image(article.url)
+
+ article.summary = summary
+
+ if summary_image:
+ article.image = summary_image[:512]
+
article.save()
week_ago = datetime.utcnow() - timedelta(days=7)
@@ -139,11 +150,18 @@ def resolve_real_url(entry):
depth = 10
while depth > 0:
depth -= 1
- r = requests.head(url)
- if 300 < r.status_code < 400:
- url = r.headers["location"]
+
+ try:
+ response = requests.head(url)
+ except RequestException:
+ log.warning(f"Failed to resolve real URL: {entry.link}")
+ return None
+
+ if 300 < response.status_code < 400:
+ url = response.headers["location"]
else:
break
+
return url
@@ -175,7 +193,11 @@ def parse_entry_text_and_image(entry):
def load_and_parse_full_article_text_and_image(url):
- pass
+ article = NewspaperArticle(url)
+ article.download()
+ article.parse()
+ article.nlp()
+ return article.summary, article.top_image
if __name__ == '__main__':
diff --git a/static/css/components.css b/static/css/components.css
index 2b1a32a..b99adb6 100644
--- a/static/css/components.css
+++ b/static/css/components.css
@@ -152,7 +152,7 @@
font-size: 180%;
text-align: center;
border-bottom: solid 2px var(--text-color);
- min-height: 40px;
+ min-height: 45px;
}
.is-block-header-dummy {
@@ -202,7 +202,7 @@
}
.feed {
- padding: 30px 20px 0;
+ padding: 35px 20px 0;
}
.feed-title {
diff --git a/templates/board.html b/templates/board.html
index c665548..199879b 100644
--- a/templates/board.html
+++ b/templates/board.html
@@ -62,7 +62,11 @@
{{ article.title|truncatechars:100 }}
{% if article.description and article.description|length > 20 %}
- {{ article.description|truncatechars:300 }}
+ {% if article.summary %}
+ {{ article.summary|striptags|nl2br|truncatechars:300|safe }}
+ {% else %}
+ {{ article.description|truncatechars:300 }}
+ {% endif %}
{% endif %}
{{ article.natural_created_at }}
diff --git a/templates/index.html b/templates/index.html
index aed1dfd..368a5ed 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -4,7 +4,10 @@
{% block content %}