Conditional feed parsing

This commit is contained in:
vas3k
2020-01-19 19:27:17 +01:00
parent acdd16e1de
commit 80ca0ad5e4
5 changed files with 52 additions and 2 deletions

View File

@@ -17,9 +17,13 @@ boards:
- name: На русском
slug: ru
feeds:
- name: VC
- name: "vc.ru: Технологии"
url: https://vc.ru
rss: https://vc.ru/rss/all
conditions:
- type: in
field: link
in: "https://vc.ru/tech/"
- name: TJ
url: https://tjournal.ru
rss: https://tjournal.ru/rss/all

View File

@@ -0,0 +1,19 @@
# Generated by Django 2.2.8 on 2020-01-19 18:20
import django.contrib.postgres.fields.jsonb
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('boards', '0005_auto_20200119_1554'),
]
operations = [
migrations.AddField(
model_name='boardfeed',
name='conditions',
field=django.contrib.postgres.fields.jsonb.JSONField(null=True),
),
]

View File

@@ -3,6 +3,7 @@ from datetime import datetime, timedelta
from django.contrib.humanize.templatetags.humanize import naturaltime
from django.db import models
from django.contrib.postgres.fields import JSONField
from slugify import slugify
from boards.icons import DOMAIN_ICONS
@@ -101,6 +102,8 @@ class BoardFeed(models.Model):
articles_per_column = models.SmallIntegerField(default=15)
index = models.PositiveIntegerField(default=0)
conditions = JSONField(null=True)
class Meta:
db_table = "board_feeds"
ordering = ["index"]

View File

@@ -92,6 +92,7 @@ def initialize(config, board_slug, upload_favicons):
for feed_index, feed_config in enumerate(block_config.get("feeds") or []):
feed_name = feed_config.get("name")
feed_url = feed_config["url"]
print(f"Creating or updating feed: {feed_name}...")
feed, is_created = BoardFeed.objects.get_or_create(
@@ -105,6 +106,7 @@ def initialize(config, board_slug, upload_favicons):
icon=feed_config.get("icon"),
index=feed_index,
columns=feed_config.get("columns") or 1,
conditions=feed_config.get("conditions")
)
)
@@ -113,6 +115,7 @@ def initialize(config, board_slug, upload_favicons):
feed.comment = feed_config.get("comment")
feed.index = feed_index
feed.columns = feed_config.get("columns") or 1
feed.conditions = feed_config.get("conditions")
html = None

View File

@@ -63,7 +63,8 @@ def update(num_workers, force, feed):
"id": feed.id,
"board_id": feed.board_id,
"name": feed.name,
"rss": feed.rss
"rss": feed.rss,
"conditions": feed.conditions,
})
threads = []
@@ -114,6 +115,14 @@ def refresh_feed(item):
continue
print(f"- article: '{entry_title}' {entry.link}")
conditions = item.get("conditions")
if conditions:
is_valid = check_conditions(conditions, entry)
if not is_valid:
print(f"Condition {conditions} does not match. Skipped")
continue
article, is_created = Article.objects.get_or_create(
board_id=item["board_id"],
feed_id=item["id"],
@@ -175,6 +184,18 @@ def refresh_feed(item):
)
def check_conditions(conditions, entry):
if not conditions:
return True
for condition in conditions:
if condition["type"] == "in":
if condition["in"] not in entry[condition["field"]]:
return False
return True
def resolve_url(entry):
url = entry.link
content_type = None