2022-12-20 05:55:14 -08:00
|
|
|
import pytest
|
|
|
|
|
2023-01-29 16:46:22 -08:00
|
|
|
from core.html import FediverseHtmlParser
|
2022-11-27 11:09:08 -08:00
|
|
|
|
|
|
|
|
2023-01-29 16:46:22 -08:00
|
|
|
@pytest.mark.django_db
|
|
|
|
def test_parser(identity):
|
|
|
|
"""
|
|
|
|
Validates the HtmlParser in its various output modes
|
|
|
|
"""
|
2022-11-27 11:09:08 -08:00
|
|
|
|
2023-01-29 16:46:22 -08:00
|
|
|
# Basic tag allowance
|
|
|
|
parser = FediverseHtmlParser("<p>Hello!</p><script></script>")
|
|
|
|
assert parser.html == "<p>Hello!</p>"
|
|
|
|
assert parser.plain_text == "Hello!"
|
|
|
|
|
|
|
|
# Newline erasure
|
|
|
|
parser = FediverseHtmlParser("<p>Hi!</p>\n\n<p>How are you?</p>")
|
|
|
|
assert parser.html == "<p>Hi!</p><p>How are you?</p>"
|
|
|
|
assert parser.plain_text == "Hi!\n\nHow are you?"
|
|
|
|
|
|
|
|
# Trying to be evil
|
|
|
|
parser = FediverseHtmlParser("<scri<span></span>pt>")
|
|
|
|
assert "<scr" not in parser.html
|
|
|
|
parser = FediverseHtmlParser("<scri #hashtag pt>")
|
|
|
|
assert "<scr" not in parser.html
|
|
|
|
|
|
|
|
# Entities are escaped
|
|
|
|
parser = FediverseHtmlParser("<p>It's great</p>", find_hashtags=True)
|
|
|
|
assert parser.html == "<p>It's great</p>"
|
|
|
|
assert parser.plain_text == "It's great"
|
|
|
|
assert parser.hashtags == set()
|
|
|
|
|
|
|
|
# Linkify works, but only with protocol prefixes
|
|
|
|
parser = FediverseHtmlParser("<p>test.com</p>")
|
|
|
|
assert parser.html == "<p>test.com</p>"
|
|
|
|
assert parser.plain_text == "test.com"
|
|
|
|
parser = FediverseHtmlParser("<p>https://test.com</p>")
|
2022-11-27 11:09:08 -08:00
|
|
|
assert (
|
2023-01-29 16:46:22 -08:00
|
|
|
parser.html == '<p><a href="https://test.com" rel="nofollow">test.com</a></p>'
|
2022-11-27 11:09:08 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.plain_text == "https://test.com"
|
2022-11-27 11:09:08 -08:00
|
|
|
|
2023-01-29 16:46:22 -08:00
|
|
|
# Links are preserved
|
|
|
|
parser = FediverseHtmlParser("<a href='https://takahe.social'>takahe social</a>")
|
2022-11-27 11:09:08 -08:00
|
|
|
assert (
|
2023-01-29 16:46:22 -08:00
|
|
|
parser.html
|
|
|
|
== '<a href="https://takahe.social" rel="nofollow">takahe social</a>'
|
2022-11-27 11:09:08 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.plain_text == "https://takahe.social"
|
2022-11-28 21:34:14 -08:00
|
|
|
|
2023-01-29 16:46:22 -08:00
|
|
|
# Very long links are shortened
|
|
|
|
full_url = "https://social.example.com/a-long/path/that-should-be-shortened"
|
|
|
|
parser = FediverseHtmlParser(f"<p>{full_url}</p>")
|
2023-01-17 22:41:33 -08:00
|
|
|
assert (
|
2023-01-29 16:46:22 -08:00
|
|
|
parser.html
|
|
|
|
== f'<p><a href="{full_url}" rel="nofollow" class="ellipsis" title="{full_url.removeprefix("https://")}">social.example.com/a-long/path</a></p>'
|
2023-01-17 22:41:33 -08:00
|
|
|
)
|
2022-12-20 05:10:35 -08:00
|
|
|
assert (
|
2023-01-29 16:46:22 -08:00
|
|
|
parser.plain_text
|
|
|
|
== "https://social.example.com/a-long/path/that-should-be-shortened"
|
2022-12-20 05:10:35 -08:00
|
|
|
)
|
2022-12-20 05:55:14 -08:00
|
|
|
|
2023-01-29 16:46:22 -08:00
|
|
|
# Make sure things that look like mentions are left alone with no mentions supplied.
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>@test@example.com</p>",
|
|
|
|
find_mentions=True,
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2023-01-16 10:59:46 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.html == "<p>@test@example.com</p>"
|
|
|
|
assert parser.plain_text == "@test@example.com"
|
|
|
|
assert parser.mentions == {"test@example.com"}
|
|
|
|
|
|
|
|
# Make sure mentions work when there is a mention supplied
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>@test@example.com</p>",
|
|
|
|
mentions=[identity],
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2023-01-16 10:59:46 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.html == '<p><a href="/@test@example.com/">@test</a></p>'
|
|
|
|
assert parser.plain_text == "@test@example.com"
|
|
|
|
assert parser.mentions == {"test@example.com"}
|
|
|
|
|
|
|
|
# Ensure mentions are case insensitive
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>@TeSt@ExamPle.com</p>",
|
|
|
|
mentions=[identity],
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2023-01-16 10:59:46 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.html == '<p><a href="/@test@example.com/">@TeSt</a></p>'
|
|
|
|
assert parser.plain_text == "@TeSt@ExamPle.com"
|
|
|
|
assert parser.mentions == {"test@example.com"}
|
|
|
|
|
|
|
|
# Ensure hashtags are linked, even through spans, but not within hrefs
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
'<a href="http://example.com#notahashtag">something</a> <span>#</span>hashtag <a href="https://example.com/tags/hashtagtwo/">#hashtagtwo</a>',
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2022-12-20 05:55:14 -08:00
|
|
|
)
|
2023-01-15 21:32:04 -08:00
|
|
|
assert (
|
2023-01-29 16:46:22 -08:00
|
|
|
parser.html
|
2023-02-19 19:35:54 -08:00
|
|
|
== '<a href="http://example.com#notahashtag" rel="nofollow">something</a> <a href="/tags/hashtag/" rel="tag">#hashtag</a> <a href="/tags/hashtagtwo/" rel="tag">#hashtagtwo</a>'
|
2023-01-15 21:32:04 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.plain_text == "http://example.com#notahashtag #hashtag #hashtagtwo"
|
|
|
|
assert parser.hashtags == {"hashtag", "hashtagtwo"}
|
|
|
|
|
|
|
|
# Ensure lists are rendered reasonably
|
|
|
|
parser = FediverseHtmlParser(
|
|
|
|
"<p>List:</p><ul><li>One</li><li>Two</li><li>Three</li></ul><p>End!</p>",
|
|
|
|
find_hashtags=True,
|
|
|
|
find_emojis=True,
|
2022-12-24 20:04:25 -08:00
|
|
|
)
|
2023-01-29 16:46:22 -08:00
|
|
|
assert parser.html == "<p>List:</p><p>One<br>Two<br>Three</p><p>End!</p>"
|
|
|
|
assert parser.plain_text == "List:\n\nOne\nTwo\nThree\n\nEnd!"
|