Chunked search reindexing

This commit is contained in:
vas3k 2021-02-14 12:45:11 +01:00
parent 37b1ba69ab
commit a4d16c2710
2 changed files with 39 additions and 10 deletions

View File

@ -6,6 +6,7 @@ from comments.models import Comment
from posts.models.post import Post
from search.models import SearchIndex
from users.models.user import User
from utils.queryset import chunked_queryset
log = logging.getLogger(__name__)
@ -16,17 +17,26 @@ class Command(BaseCommand):
def handle(self, *args, **options):
SearchIndex.objects.all().delete()
for comment in Comment.visible_objects().filter(is_deleted=False, post__is_visible=True):
self.stdout.write(f"Indexing comment: {comment.id}")
SearchIndex.update_comment_index(comment)
for chunk in chunked_queryset(
Comment.visible_objects().filter(is_deleted=False, post__is_visible=True).order_by("-created_at")
):
for comment in chunk:
self.stdout.write(f"Indexing comment: {comment.id}")
SearchIndex.update_comment_index(comment)
for post in Post.visible_objects().filter(is_shadow_banned=False):
self.stdout.write(f"Indexing post: {post.slug}")
SearchIndex.update_post_index(post)
for chunk in chunked_queryset(
Post.visible_objects().filter(is_shadow_banned=False).order_by("-created_at")
):
for post in chunk:
self.stdout.write(f"Indexing post: {post.slug}")
SearchIndex.update_post_index(post)
for user in User.objects.filter(moderation_status=User.MODERATION_STATUS_APPROVED):
self.stdout.write(f"Indexing user: {user.slug}")
SearchIndex.update_user_index(user)
SearchIndex.update_user_tags(user)
for chunk in chunked_queryset(
User.objects.filter(moderation_status=User.MODERATION_STATUS_APPROVED).order_by("-created_at")
):
for user in chunk:
self.stdout.write(f"Indexing user: {user.slug}")
SearchIndex.update_user_index(user)
SearchIndex.update_user_tags(user)
self.stdout.write("Done 🥙")

19
utils/queryset.py Normal file
View File

@ -0,0 +1,19 @@
def chunked_queryset(queryset, chunk_size=1000):
start_pk = 0
queryset = queryset.order_by("pk")
while True:
# no entries left
if not queryset.filter(pk__gt=start_pk).exists():
break
try:
# fetch chunk_size entries
end_pk = queryset.filter(pk__gt=start_pk).values_list("pk", flat=True)[chunk_size - 1]
except IndexError:
# fetch rest entries if less than chunk_size left
end_pk = queryset.values_list("pk", flat=True).last()
yield queryset.filter(pk__gt=start_pk).filter(pk__lte=end_pk)
start_pk = end_pk