Refactor HTML rendering into one place

Also suppress using external <a> tags for now, until we can separate
them from hashtags properly.
This commit is contained in:
Andrew Godwin 2022-12-20 11:39:45 +00:00
parent d750c7a871
commit 4ad4f468a4
18 changed files with 179 additions and 163 deletions

View File

@ -46,6 +46,7 @@ repos:
rev: v0.991
hooks:
- id: mypy
exclude: "^tests/"
additional_dependencies:
[
types-pyopenssl,

View File

@ -1,10 +1,11 @@
import re
from functools import partial
from typing import ClassVar, cast
from typing import ClassVar
import httpx
import urlman
from asgiref.sync import sync_to_async
from cachetools import TTLCache, cached
from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import models
@ -50,17 +51,18 @@ class EmojiStates(StateGraph):
class EmojiQuerySet(models.QuerySet):
def usable(self, domain: Domain | None = None):
if domain is None or domain.local:
visible_q = models.Q(local=True)
else:
visible_q = models.Q(public=True)
"""
Returns all usable emoji, optionally filtering by domain too.
"""
visible_q = models.Q(local=True) | models.Q(public=True)
if Config.system.emoji_unreviewed_are_public:
visible_q |= models.Q(public__isnull=True)
qs = self.filter(visible_q)
if domain:
if not domain.local:
qs = qs.filter(domain=domain)
return qs
@ -136,6 +138,13 @@ class Emoji(StatorModel):
def load_locals(cls) -> dict[str, "Emoji"]:
return {x.shortcode: x for x in Emoji.objects.usable().filter(local=True)}
@classmethod
@cached(cache=TTLCache(maxsize=1000, ttl=60))
def for_domain(cls, domain: Domain | None) -> list["Emoji"]:
if not domain:
return list(cls.locals.values())
return list(cls.objects.usable(domain))
@property
def fullcode(self):
return f":{self.shortcode}:"
@ -164,41 +173,6 @@ class Emoji(StatorModel):
)
return self.fullcode
@classmethod
def imageify_emojis(
cls,
content: str,
*,
emojis: list["Emoji"] | EmojiQuerySet | None = None,
include_local: bool = True,
):
"""
Find :emoji: in content and convert to <img>. If include_local is True,
the local emoji will be used as a fallback for any shortcodes not defined
by emojis.
"""
emoji_set = (
cast(list[Emoji], list(cls.locals.values())) if include_local else []
)
if emojis:
if isinstance(emojis, (EmojiQuerySet, list)):
emoji_set.extend(list(emojis))
else:
raise TypeError("Unsupported type for emojis")
possible_matches = {
emoji.shortcode: emoji.as_html() for emoji in emoji_set if emoji.is_usable
}
def replacer(match):
fullcode = match.group(1).lower()
if fullcode in possible_matches:
return possible_matches[fullcode]
return match.group()
return mark_safe(Emoji.emoji_regex.sub(replacer, content))
@classmethod
def emojis_from_content(cls, content: str, domain: Domain | None) -> list[str]:
"""

View File

@ -5,7 +5,6 @@ import urlman
from asgiref.sync import sync_to_async
from django.db import models
from django.utils import timezone
from django.utils.safestring import mark_safe
from core.html import strip_html
from core.models import Config
@ -176,19 +175,6 @@ class Hashtag(StatorModel):
hashtags = sorted({tag.lower() for tag in hashtag_hits})
return list(hashtags)
@classmethod
def linkify_hashtags(cls, content, domain=None) -> str:
def replacer(match):
hashtag = match.group(1)
if domain:
return f'<a class="hashtag" href="https://{domain.uri_domain}/tags/{hashtag.lower()}/">#{hashtag}</a>'
else:
return (
f'<a class="hashtag" href="/tags/{hashtag.lower()}/">#{hashtag}</a>'
)
return mark_safe(Hashtag.hashtag_regex.sub(replacer, content))
def to_mastodon_json(self):
return {
"name": self.hashtag,

View File

@ -20,8 +20,7 @@ from activities.models.post_types import (
PostTypeDataDecoder,
PostTypeDataEncoder,
)
from activities.templatetags.emoji_tags import imageify_emojis
from core.html import sanitize_post, strip_html
from core.html import ContentRenderer, strip_html
from core.ld import canonicalise, format_ld_date, get_list, parse_ld_date
from stator.exceptions import TryAgainLater
from stator.models import State, StateField, StateGraph, StatorModel
@ -383,13 +382,7 @@ class Post(StatorModel):
return mark_safe(self.mention_regex.sub(replacer, content))
def _safe_content_note(self, *, local: bool = True):
content = Hashtag.linkify_hashtags(
self.linkify_mentions(sanitize_post(self.content), local=local),
domain=None if local else self.author.domain,
)
if local:
content = imageify_emojis(content, self.author.domain)
return content
return ContentRenderer(local=local).render_post(self.content, self)
# def _safe_content_question(self, *, local: bool = True):
# context = {
@ -432,12 +425,6 @@ class Post(StatorModel):
"""
return self.safe_content(local=False)
def safe_content_plain(self):
"""
Returns the content formatted as plain text
"""
return self.linkify_mentions(sanitize_post(self.content))
### Async helpers ###
async def afetch_full(self) -> "Post":
@ -914,7 +901,7 @@ class Post(StatorModel):
"poll": None,
"card": None,
"language": None,
"text": self.safe_content_plain(),
"text": self.safe_content_remote(),
"edited_at": format_ld_date(self.edited) if self.edited else None,
}
if interactions:

View File

@ -3,8 +3,6 @@ import datetime
from django import template
from django.utils import timezone
from activities.models import Hashtag
register = template.Library()
@ -33,14 +31,3 @@ def timedeltashort(value: datetime.datetime):
years = max(days // 365.25, 1)
text = f"{years:0n}y"
return text
@register.filter
def linkify_hashtags(value: str):
"""
Convert hashtags in content in to /tags/<hashtag>/ links.
"""
if not value:
return ""
return Hashtag.linkify_hashtags(value)

View File

@ -1,27 +0,0 @@
from cachetools import TTLCache, cached
from django import template
from activities.models import Emoji
from users.models import Domain
register = template.Library()
@cached(cache=TTLCache(maxsize=1000, ttl=60))
def emoji_from_domain(domain: Domain | None) -> list[Emoji]:
if not domain:
return list(Emoji.locals.values())
return list(Emoji.objects.usable(domain))
@register.filter
def imageify_emojis(value: str, arg: Domain | None = None):
"""
Convert hashtags in content in to /tags/<hashtag>/ links.
"""
if not value:
return ""
emojis = emoji_from_domain(arg)
return Emoji.imageify_emojis(value, emojis=emojis)

View File

@ -40,6 +40,11 @@ class Individual(TemplateView):
ancestors, descendants = PostService(self.post_obj).context(
self.request.identity
)
print(
self.post_obj.to_mastodon_json(),
self.post_obj.emojis.all(),
self.post_obj.emojis.usable(),
)
return {
"identity": self.identity,
"post": self.post_obj,

View File

@ -5,4 +5,4 @@ from api.views.base import api_router
@api_router.get("/v1/custom_emojis", response=list[CustomEmoji])
def emojis(request):
return [e.to_mastodon_json() for e in Emoji.objects.usable()]
return [e.to_mastodon_json() for e in Emoji.objects.usable().filter(local=True)]

View File

@ -15,12 +15,12 @@ def allow_a(tag: str, name: str, value: str):
return False
def sanitize_post(post_html: str) -> str:
def sanitize_html(post_html: str) -> str:
"""
Only allows a, br, p and span tags, and class attributes.
"""
cleaner = bleach.Cleaner(
tags=["br", "p", "a"],
tags=["br", "p"],
attributes={ # type:ignore
"a": allow_a,
"p": ["class"],
@ -50,3 +50,117 @@ def html_to_plaintext(post_html: str) -> str:
# Remove all other HTML and return
cleaner = bleach.Cleaner(tags=[], strip=True, filters=[])
return cleaner.clean(post_html).strip()
class ContentRenderer:
"""
Renders HTML for posts, identity fields, and more.
The `local` parameter affects whether links are absolute (False) or relative (True)
"""
def __init__(self, local: bool):
self.local = local
def render_post(self, html: str, post) -> str:
"""
Given post HTML, normalises it and renders it for presentation.
"""
if not html:
return ""
html = sanitize_html(html)
html = self.linkify_mentions(html, post=post)
html = self.linkify_hashtags(html, identity=post.author)
if self.local:
html = self.imageify_emojis(html, identity=post.author)
return mark_safe(html)
def render_identity(self, html: str, identity, strip: bool = False) -> str:
"""
Given identity field HTML, normalises it and renders it for presentation.
"""
if not html:
return ""
if strip:
html = strip_html(html)
else:
html = sanitize_html(html)
html = self.linkify_hashtags(html, identity=identity)
if self.local:
html = self.imageify_emojis(html, identity=identity)
return mark_safe(html)
def linkify_mentions(self, html: str, post) -> str:
"""
Links mentions _in the context of the post_ - as in, using the mentions
property as the only source (as we might be doing this without other
DB access allowed)
"""
from activities.models import Post
possible_matches = {}
for mention in post.mentions.all():
if self.local:
url = str(mention.urls.view)
else:
url = mention.absolute_profile_uri()
possible_matches[mention.username] = url
possible_matches[f"{mention.username}@{mention.domain_id}"] = url
collapse_name: dict[str, str] = {}
def replacer(match):
precursor = match.group(1)
handle = match.group(2).lower()
if "@" in handle:
short_handle = handle.split("@", 1)[0]
else:
short_handle = handle
if handle in possible_matches:
if short_handle not in collapse_name:
collapse_name[short_handle] = handle
elif collapse_name.get(short_handle) != handle:
short_handle = handle
return f'{precursor}<a href="{possible_matches[handle]}">@{short_handle}</a>'
else:
return match.group()
return Post.mention_regex.sub(replacer, html)
def linkify_hashtags(self, html, identity) -> str:
from activities.models import Hashtag
def replacer(match):
hashtag = match.group(1)
if self.local:
return (
f'<a class="hashtag" href="/tags/{hashtag.lower()}/">#{hashtag}</a>'
)
else:
return f'<a class="hashtag" href="https://{identity.domain.uri_domain}/tags/{hashtag.lower()}/">#{hashtag}</a>'
return Hashtag.hashtag_regex.sub(replacer, html)
def imageify_emojis(self, html: str, identity, include_local: bool = True):
"""
Find :emoji: in content and convert to <img>. If include_local is True,
the local emoji will be used as a fallback for any shortcodes not defined
by emojis.
"""
from activities.models import Emoji
emoji_set = Emoji.for_domain(identity.domain)
if include_local:
emoji_set.extend(Emoji.for_domain(None))
possible_matches = {
emoji.shortcode: emoji.as_html() for emoji in emoji_set if emoji.is_usable
}
def replacer(match):
fullcode = match.group(1).lower()
if fullcode in possible_matches:
return possible_matches[fullcode]
return match.group()
return Emoji.emoji_regex.sub(replacer, html)

View File

@ -15,6 +15,7 @@ filterwarnings =
[mypy]
warn_unused_ignores = True
exclude = tests
[mypy-django.*]
ignore_missing_imports = True

View File

@ -1,5 +1,4 @@
{% extends "base.html" %}
{% load emoji_tags %}
{% block title %}{{ identity }}{% endblock %}

View File

@ -1,4 +1,5 @@
from activities.models import Hashtag
from core.html import ContentRenderer
def test_hashtag_from_content():
@ -19,7 +20,7 @@ def test_hashtag_from_content():
def test_linkify_hashtag():
linkify = Hashtag.linkify_hashtags
linkify = lambda html: ContentRenderer(local=True).linkify_hashtags(html, None)
assert linkify("# hashtag") == "# hashtag"
assert (

View File

@ -9,6 +9,16 @@ def test_fetch_post(httpx_mock: HTTPXMock, config_system):
"""
Tests that a post we don't have locally can be fetched by by_object_uri
"""
httpx_mock.add_response(
url="https://example.com/test-actor",
json={
"@context": [
"https://www.w3.org/ns/activitystreams",
],
"id": "https://example.com/test-actor",
"type": "Person",
},
)
httpx_mock.add_response(
url="https://example.com/test-post",
json={

View File

@ -6,19 +6,19 @@ from core.ld import canonicalise
@pytest.mark.django_db
def test_question_post(config_system, identity, remote_identity):
def test_question_post(config_system, identity, remote_identity, httpx_mock):
data = {
"cc": [],
"id": "https://fosstodon.org/users/manfre/statuses/109519951621804608/activity",
"id": "https://remote.test/test-actor/statuses/109519951621804608/activity",
"to": identity.absolute_profile_uri(),
"type": "Create",
"actor": "https://fosstodon.org/users/manfre",
"actor": "https://remote.test/test-actor/",
"object": {
"cc": [],
"id": "https://fosstodon.org/users/manfre/statuses/109519951621804608",
"id": "https://remote.test/test-actor/statuses/109519951621804608",
"to": identity.absolute_profile_uri(),
"tag": [],
"url": "https://fosstodon.org/@manfre/109519951621804608",
"url": "https://remote.test/test-actor/109519951621804608",
"type": "Question",
"oneOf": [
{
@ -35,13 +35,13 @@ def test_question_post(config_system, identity, remote_identity):
"content": '<p>This is a poll :python: </p><p><span class="h-card"><a href="https://ehakat.manfre.net/@mike/" class="u-url mention">@<span>mike</span></a></span></p>',
"endTime": "2022-12-18T22:03:59Z",
"replies": {
"id": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies",
"id": "https://remote.test/test-actor/statuses/109519951621804608/replies",
"type": "Collection",
"first": {
"next": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies?only_other_accounts=true&page=true",
"next": "https://remote.test/test-actor/109519951621804608/replies?only_other_accounts=true&page=true",
"type": "CollectionPage",
"items": [],
"partOf": "https://fosstodon.org/users/manfre/statuses/109519951621804608/replies",
"partOf": "https://remote.test/test-actor/109519951621804608/replies",
},
},
"published": "2022-12-15T22:03:59Z",
@ -50,15 +50,9 @@ def test_question_post(config_system, identity, remote_identity):
"en": '<p>This is a poll :python: </p><p><span class="h-card"><a href="https://ehakat.manfre.net/@mike/" class="u-url mention">@<span>mike</span></a></span></p>'
},
"as:sensitive": False,
"attributedTo": "https://fosstodon.org/users/manfre",
"http://ostatus.org#atomUri": "https://fosstodon.org/users/manfre/statuses/109519951621804608",
"http://ostatus.org#conversation": "tag:fosstodon.org,2022-12-15:objectId=69494364:objectType=Conversation",
"http://joinmastodon.org/ns#votersCount": 0,
"attributedTo": "https://remote.test/test-actor/",
"toot:votersCount": 0,
},
"@context": [
"https://www.w3.org/ns/activitystreams",
"https://w3id.org/security/v1",
],
"published": "2022-12-15T22:03:59Z",
}

View File

@ -6,7 +6,7 @@ from users.models import Identity
@pytest.mark.django_db
def test_post_context(identity: Identity):
def test_post_context(identity: Identity, config_system):
"""
Tests that post context fetching works correctly
"""

View File

@ -2,7 +2,7 @@ from datetime import timedelta
from django.utils import timezone
from activities.templatetags.activity_tags import linkify_hashtags, timedeltashort
from activities.templatetags.activity_tags import timedeltashort
def test_timedeltashort():
@ -22,17 +22,3 @@ def test_timedeltashort():
assert timedeltashort(value - timedelta(days=364)) == "364d"
assert timedeltashort(value - timedelta(days=365)) == "1y"
assert timedeltashort(value - timedelta(days=366)) == "1y"
def test_linkify_hashtags():
"""
Tests that linkify_hashtags works correctly
"""
assert linkify_hashtags(None) == ""
assert linkify_hashtags("") == ""
assert (
linkify_hashtags("#Takahe")
== '<a class="hashtag" href="/tags/takahe/">#Takahe</a>'
)

View File

@ -1,4 +1,4 @@
from core.html import html_to_plaintext, sanitize_post
from core.html import html_to_plaintext, sanitize_html
def test_html_to_plaintext():
@ -17,5 +17,5 @@ def test_html_to_plaintext():
def test_sanitize_post():
assert sanitize_post("<p>Hello!</p>") == "<p>Hello!</p>"
assert sanitize_post("<p>It&#39;s great</p>") == "<p>It&#39;s great</p>"
assert sanitize_html("<p>Hello!</p>") == "<p>Hello!</p>"
assert sanitize_html("<p>It&#39;s great</p>") == "<p>It&#39;s great</p>"

View File

@ -11,7 +11,7 @@ from django.utils import timezone
from django.utils.functional import lazy
from core.exceptions import ActorMismatchError
from core.html import sanitize_post, strip_html
from core.html import ContentRenderer, strip_html
from core.ld import (
canonicalise,
format_ld_date,
@ -192,20 +192,18 @@ class Identity(StatorModel):
@property
def safe_summary(self):
from activities.templatetags.emoji_tags import imageify_emojis
return imageify_emojis(sanitize_post(self.summary), self.domain)
return ContentRenderer(local=True).render_identity(self.summary, self)
@property
def safe_metadata(self):
from activities.templatetags.emoji_tags import imageify_emojis
renderer = ContentRenderer(local=True)
if not self.metadata:
return []
return [
{
"name": imageify_emojis(strip_html(data["name"]), self.domain),
"value": imageify_emojis(strip_html(data["value"]), self.domain),
"name": renderer.render_identity(data["name"], self, strip=True),
"value": renderer.render_identity(data["value"], self, strip=True),
}
for data in self.metadata
]
@ -279,9 +277,9 @@ class Identity(StatorModel):
"""
Return the name_or_handle with any HTML substitutions made
"""
from activities.templatetags.emoji_tags import imageify_emojis
return imageify_emojis(self.name_or_handle, self.domain)
return ContentRenderer(local=True).render_identity(
self.name_or_handle, self, strip=True
)
@property
def handle(self):