Add is_parsable flag to some feeds

This commit is contained in:
Vasily Zubarev
2020-02-07 13:57:23 +01:00
parent e178f9ff2e
commit ad72fa5769
5 changed files with 34 additions and 10 deletions

View File

@@ -0,0 +1,18 @@
# Generated by Django 2.2.8 on 2020-02-07
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('boards', '0007_auto_20200122_1526'),
]
operations = [
migrations.AddField(
model_name='boardfeed',
name='is_parsable',
field=models.BooleanField(default=True),
),
]

View File

@@ -103,6 +103,7 @@ class BoardFeed(models.Model):
index = models.PositiveIntegerField(default=0)
conditions = JSONField(null=True)
is_parsable = models.BooleanField(default=True)
class Meta:
db_table = "board_feeds"

View File

@@ -22,8 +22,9 @@ from scripts.common import DEFAULT_REQUEST_HEADERS
@click.command()
@click.option("--config", default="boards.yml", help="Boards YAML file")
@click.option("--board-slug", default=None, help="Board slug to parse only one exact board")
@click.option("--upload-favicons/--no-upload-favicons", default=True, help="Upload favicons")
def initialize(config, board_slug, upload_favicons):
@click.option("--upload-favicons/--no-upload-favicons", default=False, help="Upload favicons")
@click.option("-y", "always_yes", is_flag=True, help="Don't ask any questions (good for scripts)")
def initialize(config, board_slug, upload_favicons, always_yes):
yaml_file = os.path.join(BASE_DIR, config)
with open(yaml_file) as f:
try:
@@ -32,7 +33,8 @@ def initialize(config, board_slug, upload_favicons):
print(f"Bad YAML file '{yaml_file}': {ex}")
exit(1)
input(f"Initializing feeds from {yaml_file}. Press Enter to continue...")
if not always_yes:
input(f"Initializing feeds from {yaml_file}. Press Enter to continue...")
for board_index, board_config in enumerate(config.get("boards") or []):
if board_slug and board_config["slug"] != board_slug:
@@ -106,7 +108,8 @@ def initialize(config, board_slug, upload_favicons):
icon=feed_config.get("icon"),
index=feed_index,
columns=feed_config.get("columns") or 1,
conditions=feed_config.get("conditions")
conditions=feed_config.get("conditions"),
is_parsable=feed_config.get("is_parsable") or True
)
)
@@ -117,6 +120,7 @@ def initialize(config, board_slug, upload_favicons):
feed.index = feed_index
feed.columns = feed_config.get("columns") or 1
feed.conditions = feed_config.get("conditions")
feed.is_parsable = feed_config.get("is_parsable") or True
html = None

View File

@@ -58,6 +58,7 @@ def update(num_workers, force, feed):
"name": feed.name,
"rss": feed.rss,
"conditions": feed.conditions,
"is_parsable": feed.is_parsable,
})
threads = []
@@ -130,14 +131,14 @@ def refresh_feed(item):
created_at=parse_datetime(entry),
updated_at=datetime.utcnow(),
title=entry_title[:256],
image=str(parse_image(entry) or "")[:512],
image=str(parse_rss_image(entry) or "")[:512],
description=entry.get("summary"),
)
)
if is_created:
# parse heavy info
text, lead_image = parse_text_and_image(entry)
text, lead_image = parse_rss_text_and_image(entry)
if text:
article.description = text[:1000]
@@ -149,7 +150,7 @@ def refresh_feed(item):
real_url, content_type, content_length = resolve_url(entry_link)
# load and summarize article
if content_length <= MAX_PARSABLE_CONTENT_LENGTH \
if item["is_parsable"] and content_length <= MAX_PARSABLE_CONTENT_LENGTH \
and content_type.startswith("text/"): # to not try to parse podcasts :D
if real_url:
@@ -246,7 +247,7 @@ def parse_link(entry):
return None
def parse_image(entry):
def parse_rss_image(entry):
if entry.get("media_content"):
images = [m["url"] for m in entry["media_content"] if m.get("medium") == "image" and m.get("url")]
if images:
@@ -260,7 +261,7 @@ def parse_image(entry):
return None
def parse_text_and_image(entry):
def parse_rss_text_and_image(entry):
if not entry.get("summary"):
return "", ""

View File

@@ -78,7 +78,7 @@
{% if article.description or article.summary %}
<span class="article-tooltip-description">
{% if article.summary %}
{% if feed.is_parsable and article.summary %}
{{ article.summary|striptags|truncatechars:700|escape|nl2p|safe }}
{% else %}
{{ article.description|striptags|truncatechars:700|escape|nl2p|safe }}