Add is_parsable flag to some feeds
This commit is contained in:
18
boards/migrations/0008_boardfeed_is_parsable.py
Normal file
18
boards/migrations/0008_boardfeed_is_parsable.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 2.2.8 on 2020-02-07
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('boards', '0007_auto_20200122_1526'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='boardfeed',
|
||||
name='is_parsable',
|
||||
field=models.BooleanField(default=True),
|
||||
),
|
||||
]
|
||||
@@ -103,6 +103,7 @@ class BoardFeed(models.Model):
|
||||
index = models.PositiveIntegerField(default=0)
|
||||
|
||||
conditions = JSONField(null=True)
|
||||
is_parsable = models.BooleanField(default=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "board_feeds"
|
||||
|
||||
@@ -22,8 +22,9 @@ from scripts.common import DEFAULT_REQUEST_HEADERS
|
||||
@click.command()
|
||||
@click.option("--config", default="boards.yml", help="Boards YAML file")
|
||||
@click.option("--board-slug", default=None, help="Board slug to parse only one exact board")
|
||||
@click.option("--upload-favicons/--no-upload-favicons", default=True, help="Upload favicons")
|
||||
def initialize(config, board_slug, upload_favicons):
|
||||
@click.option("--upload-favicons/--no-upload-favicons", default=False, help="Upload favicons")
|
||||
@click.option("-y", "always_yes", is_flag=True, help="Don't ask any questions (good for scripts)")
|
||||
def initialize(config, board_slug, upload_favicons, always_yes):
|
||||
yaml_file = os.path.join(BASE_DIR, config)
|
||||
with open(yaml_file) as f:
|
||||
try:
|
||||
@@ -32,7 +33,8 @@ def initialize(config, board_slug, upload_favicons):
|
||||
print(f"Bad YAML file '{yaml_file}': {ex}")
|
||||
exit(1)
|
||||
|
||||
input(f"Initializing feeds from {yaml_file}. Press Enter to continue...")
|
||||
if not always_yes:
|
||||
input(f"Initializing feeds from {yaml_file}. Press Enter to continue...")
|
||||
|
||||
for board_index, board_config in enumerate(config.get("boards") or []):
|
||||
if board_slug and board_config["slug"] != board_slug:
|
||||
@@ -106,7 +108,8 @@ def initialize(config, board_slug, upload_favicons):
|
||||
icon=feed_config.get("icon"),
|
||||
index=feed_index,
|
||||
columns=feed_config.get("columns") or 1,
|
||||
conditions=feed_config.get("conditions")
|
||||
conditions=feed_config.get("conditions"),
|
||||
is_parsable=feed_config.get("is_parsable") or True
|
||||
)
|
||||
)
|
||||
|
||||
@@ -117,6 +120,7 @@ def initialize(config, board_slug, upload_favicons):
|
||||
feed.index = feed_index
|
||||
feed.columns = feed_config.get("columns") or 1
|
||||
feed.conditions = feed_config.get("conditions")
|
||||
feed.is_parsable = feed_config.get("is_parsable") or True
|
||||
|
||||
html = None
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ def update(num_workers, force, feed):
|
||||
"name": feed.name,
|
||||
"rss": feed.rss,
|
||||
"conditions": feed.conditions,
|
||||
"is_parsable": feed.is_parsable,
|
||||
})
|
||||
|
||||
threads = []
|
||||
@@ -130,14 +131,14 @@ def refresh_feed(item):
|
||||
created_at=parse_datetime(entry),
|
||||
updated_at=datetime.utcnow(),
|
||||
title=entry_title[:256],
|
||||
image=str(parse_image(entry) or "")[:512],
|
||||
image=str(parse_rss_image(entry) or "")[:512],
|
||||
description=entry.get("summary"),
|
||||
)
|
||||
)
|
||||
|
||||
if is_created:
|
||||
# parse heavy info
|
||||
text, lead_image = parse_text_and_image(entry)
|
||||
text, lead_image = parse_rss_text_and_image(entry)
|
||||
|
||||
if text:
|
||||
article.description = text[:1000]
|
||||
@@ -149,7 +150,7 @@ def refresh_feed(item):
|
||||
real_url, content_type, content_length = resolve_url(entry_link)
|
||||
|
||||
# load and summarize article
|
||||
if content_length <= MAX_PARSABLE_CONTENT_LENGTH \
|
||||
if item["is_parsable"] and content_length <= MAX_PARSABLE_CONTENT_LENGTH \
|
||||
and content_type.startswith("text/"): # to not try to parse podcasts :D
|
||||
|
||||
if real_url:
|
||||
@@ -246,7 +247,7 @@ def parse_link(entry):
|
||||
return None
|
||||
|
||||
|
||||
def parse_image(entry):
|
||||
def parse_rss_image(entry):
|
||||
if entry.get("media_content"):
|
||||
images = [m["url"] for m in entry["media_content"] if m.get("medium") == "image" and m.get("url")]
|
||||
if images:
|
||||
@@ -260,7 +261,7 @@ def parse_image(entry):
|
||||
return None
|
||||
|
||||
|
||||
def parse_text_and_image(entry):
|
||||
def parse_rss_text_and_image(entry):
|
||||
if not entry.get("summary"):
|
||||
return "", ""
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@
|
||||
|
||||
{% if article.description or article.summary %}
|
||||
<span class="article-tooltip-description">
|
||||
{% if article.summary %}
|
||||
{% if feed.is_parsable and article.summary %}
|
||||
{{ article.summary|striptags|truncatechars:700|escape|nl2p|safe }}
|
||||
{% else %}
|
||||
{{ article.description|striptags|truncatechars:700|escape|nl2p|safe }}
|
||||
|
||||
Reference in New Issue
Block a user