Correctly handle GTS mentions of same username

Fixes #546
This commit is contained in:
Andrew Godwin 2023-03-22 10:43:51 -06:00
parent c702b1b24d
commit 1994671541
2 changed files with 25 additions and 2 deletions

View File

@ -98,6 +98,7 @@ class FediverseHtmlParser(HTMLParser):
domain = mention.domain_id.lower() domain = mention.domain_id.lower()
self.mention_matches[f"{username}"] = url self.mention_matches[f"{username}"] = url
self.mention_matches[f"{username}@{domain}"] = url self.mention_matches[f"{username}@{domain}"] = url
self.mention_matches[mention.absolute_profile_uri()] = url
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
if tag in self.REWRITE_TO_P: if tag in self.REWRITE_TO_P:
@ -126,7 +127,7 @@ class FediverseHtmlParser(HTMLParser):
has_ellipsis = "ellipsis" in self._pending_a["attrs"].get("class", "") has_ellipsis = "ellipsis" in self._pending_a["attrs"].get("class", "")
# Is it a mention? # Is it a mention?
if content.lower().lstrip("@") in self.mention_matches: if content.lower().lstrip("@") in self.mention_matches:
self.html_output += self.create_mention(content) self.html_output += self.create_mention(content, href)
self.text_output += content self.text_output += content
# Is it a hashtag? # Is it a hashtag?
elif self.HASHTAG_REGEX.match(content): elif self.HASHTAG_REGEX.match(content):
@ -172,7 +173,7 @@ class FediverseHtmlParser(HTMLParser):
else: else:
return f'<a href="{html.escape(href)}" rel="nofollow">{html.escape(content)}</a>' return f'<a href="{html.escape(href)}" rel="nofollow">{html.escape(content)}</a>'
def create_mention(self, handle) -> str: def create_mention(self, handle, href: str | None = None) -> str:
""" """
Generates a mention link. Handle should have a leading @. Generates a mention link. Handle should have a leading @.
@ -187,6 +188,9 @@ class FediverseHtmlParser(HTMLParser):
short_hash = short_handle.lower() short_hash = short_handle.lower()
self.mentions.add(handle_hash) self.mentions.add(handle_hash)
url = self.mention_matches.get(handle_hash) url = self.mention_matches.get(handle_hash)
# If we have a captured link out, use that as the actual resolver
if href and href in self.mention_matches:
url = self.mention_matches[href]
if url: if url:
if short_hash not in self.mention_aliases: if short_hash not in self.mention_aliases:
self.mention_aliases[short_hash] = handle_hash self.mention_aliases[short_hash] = handle_hash

View File

@ -115,3 +115,22 @@ def test_parser(identity):
) )
assert parser.html == "<p>List:</p><p>One<br>Two<br>Three</p><p>End!</p>" assert parser.html == "<p>List:</p><p>One<br>Two<br>Three</p><p>End!</p>"
assert parser.plain_text == "List:\n\nOne\nTwo\nThree\n\nEnd!" assert parser.plain_text == "List:\n\nOne\nTwo\nThree\n\nEnd!"
@pytest.mark.django_db
def test_parser_same_name_mentions(remote_identity, remote_identity2):
"""
Ensure mentions that differ only by link are parsed right
"""
parser = FediverseHtmlParser(
'<span class="h-card"><a href="https://remote.test/@test/" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>test</span></a></span> <span class="h-card"><a href="https://remote2.test/@test/" class="u-url mention" rel="nofollow noreferrer noopener" target="_blank">@<span>test</span></a></span>',
mentions=[remote_identity, remote_identity2],
find_hashtags=True,
find_emojis=True,
)
assert (
parser.html
== '<a href="/@test@remote.test/">@test</a> <a href="/@test@remote2.test/">@test</a>'
)
assert parser.plain_text == "@test @test"