Fix hashtag extraction

This commit is contained in:
Andrew Godwin 2022-11-29 09:44:22 -07:00
parent 17107618a0
commit 8139ccffdb
2 changed files with 4 additions and 2 deletions

View File

@ -176,7 +176,7 @@ class Hashtag(StatorModel):
leading '#'.
"""
hashtag_hits = cls.hashtag_regex.findall(strip_html(content))
hashtags = sorted({tag[1].lower() for tag in hashtag_hits})
hashtags = sorted({tag.lower() for tag in hashtag_hits})
return list(hashtags)
@classmethod

View File

@ -9,11 +9,13 @@ def test_hashtag_from_content():
"with",
]
assert Hashtag.hashtags_from_content("#hashtag.") == ["hashtag"]
assert Hashtag.hashtags_from_content("More text\n#one # two ##three #hashtag;") == [
assert Hashtag.hashtags_from_content("More text\n#one # two ##three #hashtag!") == [
"hashtag",
"one",
"three",
]
assert Hashtag.hashtags_from_content("my #html loves   entities") == ["html"]
assert Hashtag.hashtags_from_content("<span class='hash'>#</span>tag") == ["tag"]
def test_linkify_hashtag():