From 8cc1691857d2f65c1fe0351f098aa1dba39e22c9 Mon Sep 17 00:00:00 2001 From: Andrew Godwin Date: Sun, 1 Oct 2023 10:17:00 -0600 Subject: [PATCH] Delete remote posts after a set horizon time --- activities/models/post.py | 25 +++++++++++++++++++++++-- takahe/settings.py | 5 +++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/activities/models/post.py b/activities/models/post.py index 2cb3be7..342e4b5 100644 --- a/activities/models/post.py +++ b/activities/models/post.py @@ -8,6 +8,7 @@ from urllib.parse import urlparse import httpx import urlman +from django.conf import settings from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.search import SearchVector from django.db import models, transaction @@ -47,14 +48,15 @@ from users.models.system_actor import SystemActor class PostStates(StateGraph): new = State(try_interval=300) - fanned_out = State(externally_progressed=True) + fanned_out = State(try_interval=86400 * 14) deleted = State(try_interval=300) - deleted_fanned_out = State(delete_after=24 * 60 * 60) + deleted_fanned_out = State(delete_after=86400) edited = State(try_interval=300) edited_fanned_out = State(externally_progressed=True) new.transitions_to(fanned_out) + fanned_out.transitions_to(deleted_fanned_out) fanned_out.transitions_to(deleted) fanned_out.transitions_to(edited) @@ -87,6 +89,25 @@ class PostStates(StateGraph): instance.ensure_hashtags() return cls.fanned_out + @classmethod + def handle_fanned_out(cls, instance: "Post"): + """ + For remote posts, sees if we can delete them every so often. + """ + # To be a candidate for deletion, a post must be remote and old enough + if instance.local: + return + if instance.created > timezone.now() - datetime.timedelta( + days=settings.SETUP.REMOTE_PRUNE_HORIZON + ): + return + # It must have no local interactions + if instance.interactions.filter(identity__local=True).exists(): + return + # OK, delete it! + instance.delete() + return cls.deleted_fanned_out + @classmethod def handle_deleted(cls, instance: "Post"): """ diff --git a/takahe/settings.py b/takahe/settings.py index bd682cd..a5dd4dd 100644 --- a/takahe/settings.py +++ b/takahe/settings.py @@ -143,6 +143,11 @@ class Settings(BaseSettings): #: Default cache backend CACHES_DEFAULT: CacheBackendUrl | None = None + # How long to wait, in days, until remote posts/profiles are pruned from + # our database if nobody local has interacted with them. Must be in rough + # multiples of two weeks. + REMOTE_PRUNE_HORIZON: int = 60 + # Stator tuning STATOR_CONCURRENCY: int = 50 STATOR_CONCURRENCY_PER_MODEL: int = 15