Rework Stator to use a next field and no async

This commit is contained in:
Andrew Godwin 2023-07-07 15:14:06 -06:00
parent e34e4c0c77
commit 7f483af8d3
28 changed files with 1004 additions and 461 deletions

View File

@ -210,8 +210,8 @@ class TimelineEventAdmin(admin.ModelAdmin):
@admin.register(FanOut)
class FanOutAdmin(admin.ModelAdmin):
list_display = ["id", "state", "created", "state_attempted", "type", "identity"]
list_filter = (IdentityLocalFilter, "type", "state", "state_attempted")
list_display = ["id", "state", "created", "state_next_attempt", "type", "identity"]
list_filter = (IdentityLocalFilter, "type", "state")
raw_id_fields = ["subject_post", "subject_post_interaction"]
autocomplete_fields = ["identity"]
readonly_fields = ["created", "updated", "state_changed"]
@ -229,7 +229,7 @@ class FanOutAdmin(admin.ModelAdmin):
@admin.register(PostInteraction)
class PostInteractionAdmin(admin.ModelAdmin):
list_display = ["id", "state", "state_attempted", "type", "identity", "post"]
list_display = ["id", "state", "state_next_attempt", "type", "identity", "post"]
list_filter = (IdentityLocalFilter, "type", "state")
raw_id_fields = ["post"]
autocomplete_fields = ["identity"]

View File

@ -0,0 +1,234 @@
# Generated by Django 4.2.1 on 2023-07-05 22:18
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("activities", "0016_index_together_migration"),
]
operations = [
migrations.RemoveIndex(
model_name="emoji",
name="activities__state_r_aa72ec_idx",
),
migrations.RemoveIndex(
model_name="emoji",
name="ix_emoji_state_attempted",
),
migrations.RemoveIndex(
model_name="emoji",
name="ix_emoji_state_locked",
),
migrations.RemoveIndex(
model_name="fanout",
name="ix_fanout_state_attempted",
),
migrations.RemoveIndex(
model_name="fanout",
name="ix_fanout_state_locked",
),
migrations.RemoveIndex(
model_name="fanout",
name="activities__state_r_aae3b4_idx",
),
migrations.RemoveIndex(
model_name="hashtag",
name="ix_hashtag_state_attempted",
),
migrations.RemoveIndex(
model_name="hashtag",
name="ix_hashtag_state_locked",
),
migrations.RemoveIndex(
model_name="hashtag",
name="activities__state_r_5703be_idx",
),
migrations.RemoveIndex(
model_name="post",
name="ix_post_state_attempted",
),
migrations.RemoveIndex(
model_name="post",
name="ix_post_state_locked",
),
migrations.RemoveIndex(
model_name="post",
name="activities__state_r_b8f1ff_idx",
),
migrations.RemoveIndex(
model_name="postattachment",
name="ix_postattachm_state_attempted",
),
migrations.RemoveIndex(
model_name="postattachment",
name="ix_postattachm_state_locked",
),
migrations.RemoveIndex(
model_name="postattachment",
name="activities__state_r_4e981c_idx",
),
migrations.RemoveIndex(
model_name="postinteraction",
name="activities__state_r_981d8c_idx",
),
migrations.RemoveIndex(
model_name="postinteraction",
name="ix_postinterac_state_attempted",
),
migrations.RemoveIndex(
model_name="postinteraction",
name="ix_postinterac_state_locked",
),
migrations.RemoveField(
model_name="emoji",
name="state_attempted",
),
migrations.RemoveField(
model_name="emoji",
name="state_ready",
),
migrations.RemoveField(
model_name="fanout",
name="state_attempted",
),
migrations.RemoveField(
model_name="fanout",
name="state_ready",
),
migrations.RemoveField(
model_name="hashtag",
name="state_attempted",
),
migrations.RemoveField(
model_name="hashtag",
name="state_ready",
),
migrations.RemoveField(
model_name="post",
name="state_attempted",
),
migrations.RemoveField(
model_name="post",
name="state_ready",
),
migrations.RemoveField(
model_name="postattachment",
name="state_attempted",
),
migrations.RemoveField(
model_name="postattachment",
name="state_ready",
),
migrations.RemoveField(
model_name="postinteraction",
name="state_attempted",
),
migrations.RemoveField(
model_name="postinteraction",
name="state_ready",
),
migrations.AddField(
model_name="emoji",
name="state_next_attempt",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="fanout",
name="state_next_attempt",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="hashtag",
name="state_next_attempt",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="post",
name="state_next_attempt",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="postattachment",
name="state_next_attempt",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AddField(
model_name="postinteraction",
name="state_next_attempt",
field=models.DateTimeField(blank=True, null=True),
),
migrations.AlterField(
model_name="emoji",
name="state_locked_until",
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name="fanout",
name="state_locked_until",
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name="hashtag",
name="state_locked_until",
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name="post",
name="state_locked_until",
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name="postattachment",
name="state_locked_until",
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.AlterField(
model_name="postinteraction",
name="state_locked_until",
field=models.DateTimeField(blank=True, db_index=True, null=True),
),
migrations.AddIndex(
model_name="emoji",
index=models.Index(
fields=["state", "state_next_attempt", "state_locked_until"],
name="ix_emoji_state_next",
),
),
migrations.AddIndex(
model_name="fanout",
index=models.Index(
fields=["state", "state_next_attempt", "state_locked_until"],
name="ix_fanout_state_next",
),
),
migrations.AddIndex(
model_name="hashtag",
index=models.Index(
fields=["state", "state_next_attempt", "state_locked_until"],
name="ix_hashtag_state_next",
),
),
migrations.AddIndex(
model_name="post",
index=models.Index(
fields=["state", "state_next_attempt", "state_locked_until"],
name="ix_post_state_next",
),
),
migrations.AddIndex(
model_name="postattachment",
index=models.Index(
fields=["state", "state_next_attempt", "state_locked_until"],
name="ix_postattachm_state_next",
),
),
migrations.AddIndex(
model_name="postinteraction",
index=models.Index(
fields=["state", "state_next_attempt", "state_locked_until"],
name="ix_postinterac_state_next",
),
),
]

View File

@ -127,7 +127,7 @@ class Emoji(StatorModel):
class Meta:
unique_together = ("domain", "shortcode")
indexes = StatorModel.Meta.indexes
indexes: list = [] # We need this so Stator can add its own
class urls(urlman.Urls):
admin = "/admin/emoji/"
@ -314,11 +314,11 @@ class Emoji(StatorModel):
emoji.remote_url = icon["url"]
emoji.mimetype = mimetype
emoji.category = category
emoji.transition_set_state("outdated")
if emoji.file:
emoji.file.delete(save=True)
else:
emoji.save()
emoji.transition_perform("outdated")
return emoji
emoji = cls.objects.create(

View File

@ -1,5 +1,5 @@
import httpx
from asgiref.sync import sync_to_async
from asgiref.sync import async_to_sync
from django.db import models
from activities.models.timeline_event import TimelineEvent
@ -19,26 +19,24 @@ class FanOutStates(StateGraph):
new.times_out_to(failed, seconds=86400 * 3)
@classmethod
async def handle_new(cls, instance: "FanOut"):
def handle_new(cls, instance: "FanOut"):
"""
Sends the fan-out to the right inbox.
"""
fan_out = await instance.afetch_full()
# Don't try to fan out to identities that are not fetched yet
if not (fan_out.identity.local or fan_out.identity.inbox_uri):
if not (instance.identity.local or instance.identity.inbox_uri):
return
match (fan_out.type, fan_out.identity.local):
match (instance.type, instance.identity.local):
# Handle creating/updating local posts
case ((FanOut.Types.post | FanOut.Types.post_edited), True):
post = await fan_out.subject_post.afetch_full()
post = instance.subject_post
# If the author of the post is blocked or muted, skip out
if (
await Block.objects.active()
.filter(source=fan_out.identity, target=post.author)
.aexists()
Block.objects.active()
.filter(source=instance.identity, target=post.author)
.exists()
):
return cls.skipped
# Make a timeline event directly
@ -48,42 +46,42 @@ class FanOutStates(StateGraph):
add = True
mentioned = {identity.id for identity in post.mentions.all()}
if post.in_reply_to:
followed = await sync_to_async(set)(
fan_out.identity.outbound_follows.filter(
followed = set(
instance.identity.outbound_follows.filter(
state__in=FollowStates.group_active()
).values_list("target_id", flat=True)
)
interested_in = followed.union(
{post.author_id, fan_out.identity_id}
{post.author_id, instance.identity_id}
)
add = (post.author_id in followed) and (
bool(mentioned.intersection(interested_in))
)
if add:
await sync_to_async(TimelineEvent.add_post)(
identity=fan_out.identity,
TimelineEvent.add_post(
identity=instance.identity,
post=post,
)
# We might have been mentioned
if (
fan_out.identity.id in mentioned
and fan_out.identity_id != post.author_id
instance.identity.id in mentioned
and instance.identity_id != post.author_id
):
await sync_to_async(TimelineEvent.add_mentioned)(
identity=fan_out.identity,
TimelineEvent.add_mentioned(
identity=instance.identity,
post=post,
)
# Handle sending remote posts create
case (FanOut.Types.post, False):
post = await fan_out.subject_post.afetch_full()
post = instance.subject_post
# Sign it and send it
try:
await post.author.signed_request(
async_to_sync(post.author.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(post.to_create_ap()),
)
@ -92,14 +90,14 @@ class FanOutStates(StateGraph):
# Handle sending remote posts update
case (FanOut.Types.post_edited, False):
post = await fan_out.subject_post.afetch_full()
post = instance.subject_post
# Sign it and send it
try:
await post.author.signed_request(
async_to_sync(post.author.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(post.to_update_ap()),
)
@ -108,24 +106,24 @@ class FanOutStates(StateGraph):
# Handle deleting local posts
case (FanOut.Types.post_deleted, True):
post = await fan_out.subject_post.afetch_full()
if fan_out.identity.local:
post = instance.subject_post
if instance.identity.local:
# Remove all timeline events mentioning it
await TimelineEvent.objects.filter(
identity=fan_out.identity,
TimelineEvent.objects.filter(
identity=instance.identity,
subject_post=post,
).adelete()
).delete()
# Handle sending remote post deletes
case (FanOut.Types.post_deleted, False):
post = await fan_out.subject_post.afetch_full()
post = instance.subject_post
# Send it to the remote inbox
try:
await post.author.signed_request(
async_to_sync(post.author.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(post.to_delete_ap()),
)
@ -134,38 +132,38 @@ class FanOutStates(StateGraph):
# Handle local boosts/likes
case (FanOut.Types.interaction, True):
interaction = await fan_out.subject_post_interaction.afetch_full()
interaction = instance.subject_post_interaction
# If the author of the interaction is blocked or their notifications
# are muted, skip out
if (
await Block.objects.active()
Block.objects.active()
.filter(
models.Q(mute=False) | models.Q(include_notifications=True),
source=fan_out.identity,
source=instance.identity,
target=interaction.identity,
)
.aexists()
.exists()
):
return cls.skipped
# If blocked/muted the underlying post author, skip out
if (
await Block.objects.active()
Block.objects.active()
.filter(
source=fan_out.identity,
source=instance.identity,
target_id=interaction.post.author_id,
)
.aexists()
.exists()
):
return cls.skipped
# Make a timeline event directly
await sync_to_async(TimelineEvent.add_post_interaction)(
identity=fan_out.identity,
TimelineEvent.add_post_interaction(
identity=instance.identity,
interaction=interaction,
)
# Handle sending remote boosts/likes/votes/pins
case (FanOut.Types.interaction, False):
interaction = await fan_out.subject_post_interaction.afetch_full()
interaction = instance.subject_post_interaction
# Send it to the remote inbox
try:
if interaction.type == interaction.Types.vote:
@ -174,11 +172,11 @@ class FanOutStates(StateGraph):
body = interaction.to_add_ap()
else:
body = interaction.to_create_ap()
await interaction.identity.signed_request(
async_to_sync(interaction.identity.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(body),
)
@ -187,28 +185,28 @@ class FanOutStates(StateGraph):
# Handle undoing local boosts/likes
case (FanOut.Types.undo_interaction, True): # noqa:F841
interaction = await fan_out.subject_post_interaction.afetch_full()
interaction = instance.subject_post_interaction
# Delete any local timeline events
await sync_to_async(TimelineEvent.delete_post_interaction)(
identity=fan_out.identity,
TimelineEvent.delete_post_interaction(
identity=instance.identity,
interaction=interaction,
)
# Handle sending remote undoing boosts/likes/pins
case (FanOut.Types.undo_interaction, False): # noqa:F841
interaction = await fan_out.subject_post_interaction.afetch_full()
interaction = instance.subject_post_interaction
# Send an undo to the remote inbox
try:
if interaction.type == interaction.Types.pin:
body = interaction.to_remove_ap()
else:
body = interaction.to_undo_ap()
await interaction.identity.signed_request(
async_to_sync(interaction.identity.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(body),
)
@ -217,32 +215,30 @@ class FanOutStates(StateGraph):
# Handle sending identity edited to remote
case (FanOut.Types.identity_edited, False):
identity = await fan_out.subject_identity.afetch_full()
identity = instance.subject_identity
try:
await identity.signed_request(
async_to_sync(identity.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
),
body=canonicalise(
await sync_to_async(fan_out.subject_identity.to_update_ap)()
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(instance.subject_identity.to_update_ap()),
)
except httpx.RequestError:
return
# Handle sending identity deleted to remote
case (FanOut.Types.identity_deleted, False):
identity = await fan_out.subject_identity.afetch_full()
identity = instance.subject_identity
try:
await identity.signed_request(
async_to_sync(identity.signed_request)(
method="post",
uri=(
fan_out.identity.shared_inbox_uri
or fan_out.identity.inbox_uri
instance.identity.shared_inbox_uri
or instance.identity.inbox_uri
),
body=canonicalise(fan_out.subject_identity.to_delete_ap()),
body=canonicalise(instance.subject_identity.to_delete_ap()),
)
except httpx.RequestError:
return
@ -255,14 +251,14 @@ class FanOutStates(StateGraph):
# Created identities make a timeline event
case (FanOut.Types.identity_created, True):
await sync_to_async(TimelineEvent.add_identity_created)(
identity=fan_out.identity,
new_identity=fan_out.subject_identity,
TimelineEvent.add_identity_created(
identity=instance.identity,
new_identity=instance.subject_identity,
)
case _:
raise ValueError(
f"Cannot fan out with type {fan_out.type} local={fan_out.identity.local}"
f"Cannot fan out with type {instance.type} local={instance.identity.local}"
)
return cls.sent

View File

@ -22,12 +22,8 @@ class HashtagStates(StateGraph):
"""
Computes the stats and other things for a Hashtag
"""
from time import time
from .post import Post
start = time()
posts_query = Post.objects.local_public().tagged_with(instance)
total = await posts_query.acount()
@ -57,7 +53,6 @@ class HashtagStates(StateGraph):
instance.stats_updated = timezone.now()
await sync_to_async(instance.save)()
print(f"Updated hashtag {instance.hashtag} in {time() - start:.5f} seconds")
return cls.updated
@ -86,7 +81,6 @@ class HashtagManager(models.Manager):
class Hashtag(StatorModel):
MAXIMUM_LENGTH = 100
# Normalized hashtag without the '#'

View File

@ -8,7 +8,7 @@ from urllib.parse import urlparse
import httpx
import urlman
from asgiref.sync import async_to_sync, sync_to_async
from asgiref.sync import async_to_sync
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVector
from django.db import models, transaction
@ -63,45 +63,44 @@ class PostStates(StateGraph):
edited_fanned_out.transitions_to(deleted)
@classmethod
async def targets_fan_out(cls, post: "Post", type_: str) -> None:
def targets_fan_out(cls, post: "Post", type_: str) -> None:
# Fan out to each target
for follow in await post.aget_targets():
await FanOut.objects.acreate(
for follow in post.get_targets():
FanOut.objects.create(
identity=follow,
type=type_,
subject_post=post,
)
@classmethod
async def handle_new(cls, instance: "Post"):
def handle_new(cls, instance: "Post"):
"""
Creates all needed fan-out objects for a new Post.
"""
post = await instance.afetch_full()
# Only fan out if the post was published in the last day or it's local
# (we don't want to fan out anything older that that which is remote)
if post.local or (timezone.now() - post.published) < datetime.timedelta(days=1):
await cls.targets_fan_out(post, FanOut.Types.post)
await post.ensure_hashtags()
if instance.local or (timezone.now() - instance.published) < datetime.timedelta(
days=1
):
cls.targets_fan_out(instance, FanOut.Types.post)
instance.ensure_hashtags()
return cls.fanned_out
@classmethod
async def handle_deleted(cls, instance: "Post"):
def handle_deleted(cls, instance: "Post"):
"""
Creates all needed fan-out objects needed to delete a Post.
"""
post = await instance.afetch_full()
await cls.targets_fan_out(post, FanOut.Types.post_deleted)
cls.targets_fan_out(instance, FanOut.Types.post_deleted)
return cls.deleted_fanned_out
@classmethod
async def handle_edited(cls, instance: "Post"):
def handle_edited(cls, instance: "Post"):
"""
Creates all needed fan-out objects for an edited Post.
"""
post = await instance.afetch_full()
await cls.targets_fan_out(post, FanOut.Types.post_edited)
await post.ensure_hashtags()
cls.targets_fan_out(instance, FanOut.Types.post_edited)
instance.ensure_hashtags()
return cls.edited_fanned_out
@ -324,7 +323,7 @@ class Post(StatorModel):
fields=["visibility", "local", "created"],
name="ix_post_local_public_created",
),
] + StatorModel.Meta.indexes
]
class urls(urlman.Urls):
view = "{self.author.urls.view}posts/{self.id}/"
@ -375,8 +374,6 @@ class Post(StatorModel):
.first()
)
ain_reply_to_post = sync_to_async(in_reply_to_post)
### Content cleanup and extraction ###
def clean_type_data(self, value):
PostTypeData.parse_obj(value)
@ -552,6 +549,8 @@ class Post(StatorModel):
attachment.name = attrs.description
attachment.save()
self.transition_perform(PostStates.edited)
@classmethod
def mentions_from_content(cls, content, author) -> set[Identity]:
mention_hits = FediverseHtmlParser(content, find_mentions=True).mentions
@ -572,7 +571,7 @@ class Post(StatorModel):
mentions.add(identity)
return mentions
async def ensure_hashtags(self) -> None:
def ensure_hashtags(self) -> None:
"""
Ensure any of the already parsed hashtags from this Post
have a corresponding Hashtag record.
@ -580,10 +579,10 @@ class Post(StatorModel):
# Ensure hashtags
if self.hashtags:
for hashtag in self.hashtags:
tag, _ = await Hashtag.objects.aget_or_create(
tag, _ = Hashtag.objects.get_or_create(
hashtag=hashtag[: Hashtag.MAXIMUM_LENGTH],
)
await tag.atransition_perform(HashtagStates.outdated)
tag.transition_perform(HashtagStates.outdated)
def calculate_stats(self, save=True):
"""
@ -739,33 +738,33 @@ class Post(StatorModel):
"object": object,
}
async def aget_targets(self) -> Iterable[Identity]:
def get_targets(self) -> Iterable[Identity]:
"""
Returns a list of Identities that need to see posts and their changes
"""
targets = set()
async for mention in self.mentions.all():
for mention in self.mentions.all():
targets.add(mention)
# Then, if it's not mentions only, also deliver to followers and all hashtag followers
if self.visibility != Post.Visibilities.mentioned:
async for follower in self.author.inbound_follows.filter(
for follower in self.author.inbound_follows.filter(
state__in=FollowStates.group_active()
).select_related("source"):
targets.add(follower.source)
if self.hashtags:
async for follow in HashtagFollow.objects.by_hashtags(
for follow in HashtagFollow.objects.by_hashtags(
self.hashtags
).prefetch_related("identity"):
targets.add(follow.identity)
# If it's a reply, always include the original author if we know them
reply_post = await self.ain_reply_to_post()
reply_post = self.in_reply_to_post()
if reply_post:
targets.add(reply_post.author)
# And if it's a reply to one of our own, we have to re-fan-out to
# the original author's followers
if reply_post.author.local:
async for follower in reply_post.author.inbound_follows.filter(
for follower in reply_post.author.inbound_follows.filter(
state__in=FollowStates.group_active()
).select_related("source"):
targets.add(follower.source)
@ -782,7 +781,7 @@ class Post(StatorModel):
.filter(mute=False)
.select_related("target")
)
async for block in blocks:
for block in blocks:
try:
targets.remove(block.target)
except KeyError:

View File

@ -179,9 +179,7 @@ class PostInteraction(StatorModel):
updated = models.DateTimeField(auto_now=True)
class Meta:
indexes = [
models.Index(fields=["type", "identity", "post"])
] + StatorModel.Meta.indexes
indexes = [models.Index(fields=["type", "identity", "post"])]
### Display helpers ###

View File

@ -1,4 +1,5 @@
from django.conf import settings
from django.core.cache import cache
from hatchway import api_view
from activities.models import Post
@ -10,6 +11,15 @@ from users.models import Domain, Identity
@api_view.get
def instance_info_v1(request):
# The stats are expensive to calculate, so don't do it very often
stats = cache.get("instance_info_stats")
if stats is None:
stats = {
"user_count": Identity.objects.filter(local=True).count(),
"status_count": Post.objects.filter(local=True).not_hidden().count(),
"domain_count": Domain.objects.count(),
}
cache.set("instance_info_stats", stats, timeout=300)
return {
"uri": request.headers.get("host", settings.SETUP.MAIN_DOMAIN),
"title": Config.system.site_name,
@ -18,11 +28,7 @@ def instance_info_v1(request):
"email": "",
"version": f"takahe/{__version__}",
"urls": {},
"stats": {
"user_count": Identity.objects.filter(local=True).count(),
"status_count": Post.objects.filter(local=True).not_hidden().count(),
"domain_count": Domain.objects.count(),
},
"stats": stats,
"thumbnail": Config.system.site_banner,
"languages": ["en"],
"registrations": (Config.system.signup_allowed),

View File

@ -1,6 +1,5 @@
import traceback
from asgiref.sync import sync_to_async
from django.conf import settings
@ -40,6 +39,3 @@ def capture_exception(exception: BaseException, scope=None, **scope_args):
capture_exception(exception, scope, **scope_args)
elif settings.DEBUG:
traceback.print_exc()
acapture_exception = sync_to_async(capture_exception, thread_sensitive=False)

40
docs/releases/0.10.rst Normal file
View File

@ -0,0 +1,40 @@
0.9
===
*Released: Not Yet Released*
This release is a polish release that is prepping us for the road to 1.0.
This release's major changes:
* Stator, the background task system, has been significantly reworked to require
smaller indexes, spend less time scheduling, and has had most of its async
nature removed, as this both reduces deadlocks and improves performance in
most situations (the context switching was costing more than the gains from
talking to other servers asynchronously).
* TBC
If you'd like to help with code, design, or other areas, see
:doc:`/contributing` to see how to get in touch.
You can download images from `Docker Hub <https://hub.docker.com/r/jointakahe/takahe>`_,
or use the image name ``jointakahe/takahe:0.10``.
Upgrade Notes
-------------
Migrations
~~~~~~~~~~
There are new database migrations; they are backwards-compatible, but contain
very significant index changes to all of the main tables that may cause the
PostgreSQL deadlock detector to trigger if you attempt to apply them while your
site is live.
We recommend:
* Temporarily stopping all instances of the webserver and Stator
* Applying the migration (should be less than a few minutes on most installs)
* Restarting the instances of webserver and Stator

View File

@ -12,6 +12,10 @@ addopts = --tb=short --ds=takahe.settings --import-mode=importlib
filterwarnings =
ignore:There is no current event loop
ignore:No directory at
ignore:DateTimeField Post.created
ignore:'index_together' is deprecated
ignore:Deprecated call to
ignore:pkg_resources is deprecated as an API
[mypy]
warn_unused_ignores = True

View File

@ -13,6 +13,7 @@ class StateGraph:
initial_state: ClassVar["State"]
terminal_states: ClassVar[set["State"]]
automatic_states: ClassVar[set["State"]]
deletion_states: ClassVar[set["State"]]
def __init_subclass__(cls) -> None:
# Collect state members
@ -33,6 +34,7 @@ class StateGraph:
# Check the graph layout
terminal_states = set()
automatic_states = set()
deletion_states = set()
initial_state = None
for state in cls.states.values():
# Check for multiple initial states
@ -42,6 +44,9 @@ class StateGraph:
f"The graph has more than one initial state: {initial_state} and {state}"
)
initial_state = state
# Collect states that require deletion handling (they can be terminal or not)
if state.delete_after:
deletion_states.add(state)
# Collect terminal states
if state.terminal:
state.externally_progressed = True
@ -74,6 +79,7 @@ class StateGraph:
cls.initial_state = initial_state
cls.terminal_states = terminal_states
cls.automatic_states = automatic_states
cls.deletion_states = deletion_states
# Generate choices
cls.choices = [(name, name) for name in cls.states.keys()]
@ -98,6 +104,9 @@ class State:
self.attempt_immediately = attempt_immediately
self.force_initial = force_initial
self.delete_after = delete_after
# Deletes are also only attempted on try_intervals
if self.delete_after and not self.try_interval:
self.try_interval = self.delete_after
self.parents: set["State"] = set()
self.children: set["State"] = set()
self.timeout_state: State | None = None

View File

@ -1,6 +1,5 @@
from typing import cast
from asgiref.sync import async_to_sync
from django.apps import apps
from django.core.management.base import BaseCommand
@ -84,6 +83,6 @@ class Command(BaseCommand):
run_for=run_for,
)
try:
async_to_sync(runner.run)()
runner.run()
except KeyboardInterrupt:
print("Ctrl-C received")

View File

@ -1,8 +1,8 @@
import datetime
import traceback
from typing import ClassVar, cast
from typing import ClassVar
from asgiref.sync import sync_to_async
from asgiref.sync import async_to_sync, iscoroutinefunction
from django.db import models, transaction
from django.db.models.signals import class_prepared
from django.utils import timezone
@ -47,19 +47,15 @@ def add_stator_indexes(sender, **kwargs):
if issubclass(sender, StatorModel):
indexes = [
models.Index(
fields=["state", "state_attempted"],
name=f"ix_{sender.__name__.lower()[:11]}_state_attempted",
),
models.Index(
fields=["state_locked_until", "state"],
condition=models.Q(state_locked_until__isnull=False),
name=f"ix_{sender.__name__.lower()[:11]}_state_locked",
fields=["state", "state_next_attempt", "state_locked_until"],
name=f"ix_{sender.__name__.lower()[:11]}_state_next",
),
]
if not sender._meta.indexes:
# Meta.indexes needs to not be None to trigger Django behaviors
sender.Meta.indexes = []
sender._meta.indexes = []
for idx in indexes:
sender._meta.indexes.append(idx)
@ -81,30 +77,26 @@ class StatorModel(models.Model):
concrete model yourself.
"""
SCHEDULE_BATCH_SIZE = 1000
CLEAN_BATCH_SIZE = 1000
DELETE_BATCH_SIZE = 500
state: StateField
# If this row is up for transition attempts (which it always is on creation!)
state_ready = models.BooleanField(default=True)
# When the state last actually changed, or the date of instance creation
state_changed = models.DateTimeField(auto_now_add=True)
# When the last state change for the current state was attempted
# (and not successful, as this is cleared on transition)
state_attempted = models.DateTimeField(blank=True, null=True)
# When the next state change should be attempted (null means immediately)
state_next_attempt = models.DateTimeField(blank=True, null=True)
# If a lock is out on this row, when it is locked until
# (we don't identify the lock owner, as there's no heartbeats)
state_locked_until = models.DateTimeField(null=True, blank=True)
state_locked_until = models.DateTimeField(null=True, blank=True, db_index=True)
# Collection of subclasses of us
subclasses: ClassVar[list[type["StatorModel"]]] = []
class Meta:
abstract = True
indexes = [models.Index(fields=["state_ready", "state_locked_until", "state"])]
def __init_subclass__(cls) -> None:
if cls is not StatorModel:
@ -118,52 +110,6 @@ class StatorModel(models.Model):
def state_age(self) -> float:
return (timezone.now() - self.state_changed).total_seconds()
@classmethod
async def atransition_schedule_due(cls, now=None):
"""
Finds instances of this model that need to run and schedule them.
"""
if now is None:
now = timezone.now()
q = models.Q()
for state in cls.state_graph.states.values():
state = cast(State, state)
if not state.externally_progressed:
q = q | models.Q(
(
models.Q(
state_attempted__lte=(
now
- datetime.timedelta(
seconds=cast(float, state.try_interval)
)
)
)
| models.Q(state_attempted__isnull=True)
),
state=state.name,
)
select_query = cls.objects.filter(q)[: cls.SCHEDULE_BATCH_SIZE]
await cls.objects.filter(pk__in=select_query).aupdate(state_ready=True)
@classmethod
async def atransition_delete_due(cls, now=None):
"""
Finds instances of this model that need to be deleted and deletes them.
"""
if now is None:
now = timezone.now()
for state in cls.state_graph.states.values():
state = cast(State, state)
if state.delete_after:
select_query = cls.objects.filter(
state=state,
state_changed__lte=(
now - datetime.timedelta(seconds=state.delete_after)
),
)[: cls.SCHEDULE_BATCH_SIZE]
await cls.objects.filter(pk__in=select_query).adelete()
@classmethod
def transition_get_with_lock(
cls, number: int, lock_expiry: datetime.datetime
@ -172,11 +118,17 @@ class StatorModel(models.Model):
Returns up to `number` tasks for execution, having locked them.
"""
with transaction.atomic():
# Query for `number` rows that:
# - Have a next_attempt that's either null or in the past
# - Have one of the states we care about
# Then, sort them by next_attempt NULLS FIRST, so that we handle the
# rows in a roughly FIFO order.
selected = list(
cls.objects.filter(
state_locked_until__isnull=True,
state_ready=True,
models.Q(state_next_attempt__isnull=True)
| models.Q(state_next_attempt__lte=timezone.now()),
state__in=cls.state_graph.automatic_states,
state_locked_until__isnull=True,
)[:number].select_for_update()
)
cls.objects.filter(pk__in=[i.pk for i in selected]).update(
@ -185,44 +137,56 @@ class StatorModel(models.Model):
return selected
@classmethod
async def atransition_get_with_lock(
cls, number: int, lock_expiry: datetime.datetime
) -> list["StatorModel"]:
return await sync_to_async(cls.transition_get_with_lock)(number, lock_expiry)
def transition_delete_due(cls) -> int | None:
"""
Finds instances of this model that need to be deleted and deletes them
in small batches. Returns how many were deleted.
"""
if cls.state_graph.deletion_states:
constraints = models.Q()
for state in cls.state_graph.deletion_states:
constraints |= models.Q(
state=state,
state_changed__lte=(
timezone.now() - datetime.timedelta(seconds=state.delete_after)
),
)
select_query = cls.objects.filter(
models.Q(state_next_attempt__isnull=True)
| models.Q(state_next_attempt__lte=timezone.now()),
constraints,
)[: cls.DELETE_BATCH_SIZE]
return cls.objects.filter(pk__in=select_query).delete()[0]
return None
@classmethod
async def atransition_ready_count(cls) -> int:
def transition_ready_count(cls) -> int:
"""
Returns how many instances are "queued"
"""
return await cls.objects.filter(
return cls.objects.filter(
models.Q(state_next_attempt__isnull=True)
| models.Q(state_next_attempt__lte=timezone.now()),
state_locked_until__isnull=True,
state_ready=True,
state__in=cls.state_graph.automatic_states,
).acount()
).count()
@classmethod
async def atransition_clean_locks(cls):
def transition_clean_locks(cls):
"""
Deletes stale locks (in batches, to avoid a giant query)
"""
select_query = cls.objects.filter(state_locked_until__lte=timezone.now())[
: cls.SCHEDULE_BATCH_SIZE
: cls.CLEAN_BATCH_SIZE
]
await cls.objects.filter(pk__in=select_query).aupdate(state_locked_until=None)
cls.objects.filter(pk__in=select_query).update(state_locked_until=None)
def transition_schedule(self):
"""
Adds this instance to the queue to get its state transition attempted.
The scheduler will call this, but you can also call it directly if you
know it'll be ready and want to lower latency.
"""
self.state_ready = True
self.save()
async def atransition_attempt(self) -> State | None:
def transition_attempt(self) -> State | None:
"""
Attempts to transition the current state by running its handler(s).
"""
current_state: State = self.state_graph.states[self.state]
# If it's a manual progression state don't even try
# We shouldn't really be here in this case, but it could be a race condition
if current_state.externally_progressed:
@ -230,12 +194,17 @@ class StatorModel(models.Model):
f"Warning: trying to progress externally progressed state {self.state}!"
)
return None
# Try running its handler function
try:
next_state = await current_state.handler(self) # type: ignore
if iscoroutinefunction(current_state.handler):
next_state = async_to_sync(current_state.handler)(self)
else:
next_state = current_state.handler(self)
except TryAgainLater:
pass
except BaseException as e:
await exceptions.acapture_exception(e)
exceptions.capture_exception(e)
traceback.print_exc()
else:
if next_state:
@ -247,20 +216,24 @@ class StatorModel(models.Model):
raise ValueError(
f"Cannot transition from {current_state} to {next_state} - not a declared transition"
)
await self.atransition_perform(next_state)
self.transition_perform(next_state)
return next_state
# See if it timed out
# See if it timed out since its last state change
if (
current_state.timeout_value
and current_state.timeout_value
<= (timezone.now() - self.state_changed).total_seconds()
):
await self.atransition_perform(current_state.timeout_state)
self.transition_perform(current_state.timeout_state) # type: ignore
return current_state.timeout_state
await self.__class__.objects.filter(pk=self.pk).aupdate(
state_attempted=timezone.now(),
# Nothing happened, set next execution and unlock it
self.__class__.objects.filter(pk=self.pk).update(
state_next_attempt=(
timezone.now() + datetime.timedelta(seconds=current_state.try_interval) # type: ignore
),
state_locked_until=None,
state_ready=False,
)
return None
@ -273,27 +246,6 @@ class StatorModel(models.Model):
state,
)
atransition_perform = sync_to_async(transition_perform)
def transition_set_state(self, state: State | str):
"""
Sets the instance to the given state name for when it is saved.
"""
if isinstance(state, State):
state = state.name
if state not in self.state_graph.states:
raise ValueError(f"Invalid state {state}")
self.state = state # type: ignore
self.state_changed = timezone.now()
self.state_locked_until = None
if self.state_graph.states[state].attempt_immediately:
self.state_attempted = None
self.state_ready = True
else:
self.state_attempted = timezone.now()
self.state_ready = False
@classmethod
def transition_perform_queryset(
cls,
@ -303,26 +255,27 @@ class StatorModel(models.Model):
"""
Transitions every instance in the queryset to the given state name, forcibly.
"""
# Really ensure we have the right state object
if isinstance(state, State):
state = state.name
if state not in cls.state_graph.states:
raise ValueError(f"Invalid state {state}")
state_obj = cls.state_graph.states[state.name]
else:
state_obj = cls.state_graph.states[state]
# See if it's ready immediately (if not, delay until first try_interval)
if cls.state_graph.states[state].attempt_immediately:
if state_obj.attempt_immediately or state_obj.try_interval is None:
queryset.update(
state=state,
state=state_obj,
state_changed=timezone.now(),
state_attempted=None,
state_next_attempt=None,
state_locked_until=None,
state_ready=True,
)
else:
queryset.update(
state=state,
state=state_obj,
state_changed=timezone.now(),
state_attempted=timezone.now(),
state_next_attempt=(
timezone.now() + datetime.timedelta(seconds=state_obj.try_interval)
),
state_locked_until=None,
state_ready=False,
)
@ -355,10 +308,6 @@ class Stats(models.Model):
instance.statistics[key] = {}
return instance
@classmethod