From ad72fa57691324d276b530e3f32c87f889a39d0a Mon Sep 17 00:00:00 2001 From: Vasily Zubarev Date: Fri, 7 Feb 2020 13:57:23 +0100 Subject: [PATCH] Add is_parsable flag to some feeds --- .../migrations/0008_boardfeed_is_parsable.py | 18 ++++++++++++++++++ boards/models.py | 1 + scripts/initialize.py | 12 ++++++++---- scripts/update.py | 11 ++++++----- templates/board.html | 2 +- 5 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 boards/migrations/0008_boardfeed_is_parsable.py diff --git a/boards/migrations/0008_boardfeed_is_parsable.py b/boards/migrations/0008_boardfeed_is_parsable.py new file mode 100644 index 0000000..8275df9 --- /dev/null +++ b/boards/migrations/0008_boardfeed_is_parsable.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.8 on 2020-02-07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('boards', '0007_auto_20200122_1526'), + ] + + operations = [ + migrations.AddField( + model_name='boardfeed', + name='is_parsable', + field=models.BooleanField(default=True), + ), + ] diff --git a/boards/models.py b/boards/models.py index b609154..9380c82 100644 --- a/boards/models.py +++ b/boards/models.py @@ -103,6 +103,7 @@ class BoardFeed(models.Model): index = models.PositiveIntegerField(default=0) conditions = JSONField(null=True) + is_parsable = models.BooleanField(default=True) class Meta: db_table = "board_feeds" diff --git a/scripts/initialize.py b/scripts/initialize.py index 808f73a..154762a 100644 --- a/scripts/initialize.py +++ b/scripts/initialize.py @@ -22,8 +22,9 @@ from scripts.common import DEFAULT_REQUEST_HEADERS @click.command() @click.option("--config", default="boards.yml", help="Boards YAML file") @click.option("--board-slug", default=None, help="Board slug to parse only one exact board") -@click.option("--upload-favicons/--no-upload-favicons", default=True, help="Upload favicons") -def initialize(config, board_slug, upload_favicons): +@click.option("--upload-favicons/--no-upload-favicons", default=False, help="Upload favicons") +@click.option("-y", "always_yes", is_flag=True, help="Don't ask any questions (good for scripts)") +def initialize(config, board_slug, upload_favicons, always_yes): yaml_file = os.path.join(BASE_DIR, config) with open(yaml_file) as f: try: @@ -32,7 +33,8 @@ def initialize(config, board_slug, upload_favicons): print(f"Bad YAML file '{yaml_file}': {ex}") exit(1) - input(f"Initializing feeds from {yaml_file}. Press Enter to continue...") + if not always_yes: + input(f"Initializing feeds from {yaml_file}. Press Enter to continue...") for board_index, board_config in enumerate(config.get("boards") or []): if board_slug and board_config["slug"] != board_slug: @@ -106,7 +108,8 @@ def initialize(config, board_slug, upload_favicons): icon=feed_config.get("icon"), index=feed_index, columns=feed_config.get("columns") or 1, - conditions=feed_config.get("conditions") + conditions=feed_config.get("conditions"), + is_parsable=feed_config.get("is_parsable") or True ) ) @@ -117,6 +120,7 @@ def initialize(config, board_slug, upload_favicons): feed.index = feed_index feed.columns = feed_config.get("columns") or 1 feed.conditions = feed_config.get("conditions") + feed.is_parsable = feed_config.get("is_parsable") or True html = None diff --git a/scripts/update.py b/scripts/update.py index 4e388de..f7b293f 100644 --- a/scripts/update.py +++ b/scripts/update.py @@ -58,6 +58,7 @@ def update(num_workers, force, feed): "name": feed.name, "rss": feed.rss, "conditions": feed.conditions, + "is_parsable": feed.is_parsable, }) threads = [] @@ -130,14 +131,14 @@ def refresh_feed(item): created_at=parse_datetime(entry), updated_at=datetime.utcnow(), title=entry_title[:256], - image=str(parse_image(entry) or "")[:512], + image=str(parse_rss_image(entry) or "")[:512], description=entry.get("summary"), ) ) if is_created: # parse heavy info - text, lead_image = parse_text_and_image(entry) + text, lead_image = parse_rss_text_and_image(entry) if text: article.description = text[:1000] @@ -149,7 +150,7 @@ def refresh_feed(item): real_url, content_type, content_length = resolve_url(entry_link) # load and summarize article - if content_length <= MAX_PARSABLE_CONTENT_LENGTH \ + if item["is_parsable"] and content_length <= MAX_PARSABLE_CONTENT_LENGTH \ and content_type.startswith("text/"): # to not try to parse podcasts :D if real_url: @@ -246,7 +247,7 @@ def parse_link(entry): return None -def parse_image(entry): +def parse_rss_image(entry): if entry.get("media_content"): images = [m["url"] for m in entry["media_content"] if m.get("medium") == "image" and m.get("url")] if images: @@ -260,7 +261,7 @@ def parse_image(entry): return None -def parse_text_and_image(entry): +def parse_rss_text_and_image(entry): if not entry.get("summary"): return "", "" diff --git a/templates/board.html b/templates/board.html index 04fbdd9..3e602fa 100644 --- a/templates/board.html +++ b/templates/board.html @@ -78,7 +78,7 @@ {% if article.description or article.summary %} - {% if article.summary %} + {% if feed.is_parsable and article.summary %} {{ article.summary|striptags|truncatechars:700|escape|nl2p|safe }} {% else %} {{ article.description|striptags|truncatechars:700|escape|nl2p|safe }}