diff --git a/core/html.py b/core/html.py index 849d26f..24d8b9f 100644 --- a/core/html.py +++ b/core/html.py @@ -2,6 +2,7 @@ import re from functools import partial import bleach +import bleach.callbacks from bleach.html5lib_shim import Filter from bleach.linkifier import LinkifyFilter from django.utils.safestring import mark_safe @@ -90,6 +91,35 @@ def allow_a(tag: str, name: str, value: str): return False +def shorten_link_text(attrs, new=False): + """ + Applies Mastodon's link shortening behavior where URL text links are + shortened by removing the scheme and only showing the first 30 chars. + + Orig: + https://social.example.com/a-long/path/2023/01/16/that-should-be-shortened + + Becomes: + social.example.com/a-long/path + + """ + text = attrs.get("_text") + if not text: + text = attrs.get((None, "href")) + if text and "://" in text and len(text) > 30: + attrs[(None, "class")] = " ".join( + filter(None, [attrs.pop((None, "class"), ""), "ellipsis"]) + ) + # Add the full URL in to title for easier user inspection + attrs[(None, "title")] = attrs.get((None, "href")) + attrs["_text"] = text.split("://", 1)[-1][:30] + + return attrs + + +linkify_callbacks = [bleach.callbacks.nofollow, shorten_link_text] + + def sanitize_html(post_html: str) -> str: """ Only allows a, br, p and span tags, and class attributes. @@ -100,7 +130,10 @@ def sanitize_html(post_html: str) -> str: "a": allow_a, "p": ["class"], }, - filters=[partial(LinkifyFilter, url_re=url_regex), MastodonStrictTagFilter], + filters=[ + partial(LinkifyFilter, url_re=url_regex, callbacks=linkify_callbacks), + MastodonStrictTagFilter, + ], strip=True, ) return mark_safe(cleaner.clean(post_html)) @@ -113,7 +146,9 @@ def strip_html(post_html: str, *, linkify: bool = True) -> str: cleaner = bleach.Cleaner( tags=[], strip=True, - filters=[partial(LinkifyFilter, url_re=url_regex)] if linkify else [], + filters=[partial(LinkifyFilter, url_re=url_regex, callbacks=linkify_callbacks)] + if linkify + else [], ) return mark_safe(cleaner.clean(post_html)) diff --git a/static/css/style.css b/static/css/style.css index 6138b31..45b69ed 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -392,6 +392,10 @@ img.emoji { height: 0.8em; } +.ellipsis::after { + content: "…"; +} + /* Generic markdown styling and sections */ .no-sidebar section { diff --git a/tests/core/test_html.py b/tests/core/test_html.py index 3926093..bc63920 100644 --- a/tests/core/test_html.py +++ b/tests/core/test_html.py @@ -37,6 +37,23 @@ def test_sanitize_post(): ) +def test_shorten_url(): + full_url = ( + "https://social.example.com/a-long/path/2023/01/16/that-should-be-shortened" + ) + assert ( + sanitize_html(f"
{full_url}
") + == f'social.example.com/a-long/path
' + ) + + assert ( + sanitize_html( + f'' + ) + == f'' + ) + + @pytest.mark.django_db def test_link_preservation(): """