Apply Mastodon style link text shortening (#426)

This commit is contained in:
Michael Manfre 2023-01-16 13:59:46 -05:00 committed by GitHub
parent 54e7755080
commit 9b6ceee490
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 2 deletions

View File

@ -2,6 +2,7 @@ import re
from functools import partial
import bleach
import bleach.callbacks
from bleach.html5lib_shim import Filter
from bleach.linkifier import LinkifyFilter
from django.utils.safestring import mark_safe
@ -90,6 +91,35 @@ def allow_a(tag: str, name: str, value: str):
return False
def shorten_link_text(attrs, new=False):
"""
Applies Mastodon's link shortening behavior where URL text links are
shortened by removing the scheme and only showing the first 30 chars.
Orig:
<a>https://social.example.com/a-long/path/2023/01/16/that-should-be-shortened</a>
Becomes:
<a>social.example.com/a-long/path</a>
"""
text = attrs.get("_text")
if not text:
text = attrs.get((None, "href"))
if text and "://" in text and len(text) > 30:
attrs[(None, "class")] = " ".join(
filter(None, [attrs.pop((None, "class"), ""), "ellipsis"])
)
# Add the full URL in to title for easier user inspection
attrs[(None, "title")] = attrs.get((None, "href"))
attrs["_text"] = text.split("://", 1)[-1][:30]
return attrs
linkify_callbacks = [bleach.callbacks.nofollow, shorten_link_text]
def sanitize_html(post_html: str) -> str:
"""
Only allows a, br, p and span tags, and class attributes.
@ -100,7 +130,10 @@ def sanitize_html(post_html: str) -> str:
"a": allow_a,
"p": ["class"],
},
filters=[partial(LinkifyFilter, url_re=url_regex), MastodonStrictTagFilter],
filters=[
partial(LinkifyFilter, url_re=url_regex, callbacks=linkify_callbacks),
MastodonStrictTagFilter,
],
strip=True,
)
return mark_safe(cleaner.clean(post_html))
@ -113,7 +146,9 @@ def strip_html(post_html: str, *, linkify: bool = True) -> str:
cleaner = bleach.Cleaner(
tags=[],
strip=True,
filters=[partial(LinkifyFilter, url_re=url_regex)] if linkify else [],
filters=[partial(LinkifyFilter, url_re=url_regex, callbacks=linkify_callbacks)]
if linkify
else [],
)
return mark_safe(cleaner.clean(post_html))

View File

@ -392,6 +392,10 @@ img.emoji {
height: 0.8em;
}
.ellipsis::after {
content: "…";
}
/* Generic markdown styling and sections */
.no-sidebar section {

View File

@ -37,6 +37,23 @@ def test_sanitize_post():
)
def test_shorten_url():
full_url = (
"https://social.example.com/a-long/path/2023/01/16/that-should-be-shortened"
)
assert (
sanitize_html(f"<p>{full_url}</p>")
== f'<p><a href="{full_url}" rel="nofollow" class="ellipsis" title="{full_url}">social.example.com/a-long/path</a></p>'
)
assert (
sanitize_html(
f'<p><a href="{full_url}">This is a long link text, but cannot be shortened as a URL</a></p>'
)
== f'<p><a href="{full_url}" rel="nofollow">This is a long link text, but cannot be shortened as a URL</a></p>'
)
@pytest.mark.django_db
def test_link_preservation():
"""