takahe/activities/management/commands/pruneposts.py

81 lines
2.9 KiB
Python
Raw Normal View History

2023-11-12 15:23:43 -08:00
import datetime
import sys
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models import Q
2023-11-12 15:23:43 -08:00
from django.utils import timezone
from activities.models import Post
class Command(BaseCommand):
help = "Prunes posts that are old, not local and have no local interaction"
def add_arguments(self, parser):
parser.add_argument(
"--number",
"-n",
type=int,
2023-11-12 17:32:38 -08:00
default=500,
2023-11-12 15:23:43 -08:00
help="The maximum number of posts to prune at once",
)
def handle(self, number: int, *args, **options):
if not settings.SETUP.REMOTE_PRUNE_HORIZON:
print("Pruning has been disabled as REMOTE_PRUNE_HORIZON=0")
sys.exit(2)
2023-11-12 15:23:43 -08:00
# Find a set of posts that match the initial criteria
print(f"Running query to find up to {number} old posts...")
posts = Post.objects.filter(
local=False,
created__lt=timezone.now()
- datetime.timedelta(days=settings.SETUP.REMOTE_PRUNE_HORIZON),
).exclude(
Q(interactions__identity__local=True)
| Q(visibility=Post.Visibilities.mentioned)
)[
:number
]
2023-11-12 15:23:43 -08:00
post_ids_and_uris = dict(posts.values_list("object_uri", "id"))
print(f" found {len(post_ids_and_uris)}")
# Fetch all of their replies and exclude any that have local replies
2023-11-12 15:31:20 -08:00
print("Excluding ones with local replies...")
2023-11-12 15:23:43 -08:00
replies = Post.objects.filter(
2023-11-12 15:31:20 -08:00
local=True,
in_reply_to__in=post_ids_and_uris.keys(),
2023-11-12 15:23:43 -08:00
).values_list("in_reply_to", flat=True)
for reply in replies:
2023-11-12 15:30:49 -08:00
if reply and reply in post_ids_and_uris:
2023-11-12 15:23:43 -08:00
del post_ids_and_uris[reply]
2023-11-12 17:32:38 -08:00
print(f" narrowed down to {len(post_ids_and_uris)}")
# Fetch all the posts that they are replies to, and don't delete ones
# that are replies to local posts
print("Excluding ones that are replies to local posts...")
in_reply_tos = (
Post.objects.filter(id__in=post_ids_and_uris.values())
.values_list("in_reply_to", flat=True)
.distinct()
)
local_object_uris = Post.objects.filter(
local=True, object_uri__in=in_reply_tos
).values_list("object_uri", flat=True)
final_post_ids = list(
Post.objects.filter(id__in=post_ids_and_uris.values())
.exclude(in_reply_to__in=local_object_uris)
.values_list("id", flat=True)
)
print(f" narrowed down to {len(final_post_ids)}")
2023-11-12 15:23:43 -08:00
# Delete them
2023-11-12 17:32:38 -08:00
if not final_post_ids:
2023-11-12 17:06:29 -08:00
sys.exit(1)
print("Deleting...")
2023-11-12 17:32:38 -08:00
_, deleted = Post.objects.filter(id__in=final_post_ids).delete()
print("Deleted:")
for model, model_deleted in deleted.items():
print(f" {model}: {model_deleted}")