Delete remote posts after a set horizon time

This commit is contained in:
Andrew Godwin 2023-10-01 10:17:00 -06:00
parent b60e807b91
commit 8cc1691857
2 changed files with 28 additions and 2 deletions

View File

@ -8,6 +8,7 @@ from urllib.parse import urlparse
import httpx import httpx
import urlman import urlman
from django.conf import settings
from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVector from django.contrib.postgres.search import SearchVector
from django.db import models, transaction from django.db import models, transaction
@ -47,14 +48,15 @@ from users.models.system_actor import SystemActor
class PostStates(StateGraph): class PostStates(StateGraph):
new = State(try_interval=300) new = State(try_interval=300)
fanned_out = State(externally_progressed=True) fanned_out = State(try_interval=86400 * 14)
deleted = State(try_interval=300) deleted = State(try_interval=300)
deleted_fanned_out = State(delete_after=24 * 60 * 60) deleted_fanned_out = State(delete_after=86400)
edited = State(try_interval=300) edited = State(try_interval=300)
edited_fanned_out = State(externally_progressed=True) edited_fanned_out = State(externally_progressed=True)
new.transitions_to(fanned_out) new.transitions_to(fanned_out)
fanned_out.transitions_to(deleted_fanned_out)
fanned_out.transitions_to(deleted) fanned_out.transitions_to(deleted)
fanned_out.transitions_to(edited) fanned_out.transitions_to(edited)
@ -87,6 +89,25 @@ class PostStates(StateGraph):
instance.ensure_hashtags() instance.ensure_hashtags()
return cls.fanned_out return cls.fanned_out
@classmethod
def handle_fanned_out(cls, instance: "Post"):
"""
For remote posts, sees if we can delete them every so often.
"""
# To be a candidate for deletion, a post must be remote and old enough
if instance.local:
return
if instance.created > timezone.now() - datetime.timedelta(
days=settings.SETUP.REMOTE_PRUNE_HORIZON
):
return
# It must have no local interactions
if instance.interactions.filter(identity__local=True).exists():
return
# OK, delete it!
instance.delete()
return cls.deleted_fanned_out
@classmethod @classmethod
def handle_deleted(cls, instance: "Post"): def handle_deleted(cls, instance: "Post"):
""" """

View File

@ -143,6 +143,11 @@ class Settings(BaseSettings):
#: Default cache backend #: Default cache backend
CACHES_DEFAULT: CacheBackendUrl | None = None CACHES_DEFAULT: CacheBackendUrl | None = None
# How long to wait, in days, until remote posts/profiles are pruned from
# our database if nobody local has interacted with them. Must be in rough
# multiples of two weeks.
REMOTE_PRUNE_HORIZON: int = 60
# Stator tuning # Stator tuning
STATOR_CONCURRENCY: int = 50 STATOR_CONCURRENCY: int = 50
STATOR_CONCURRENCY_PER_MODEL: int = 15 STATOR_CONCURRENCY_PER_MODEL: int = 15