Allow unicode characters in hashtag (#659)
This commit is contained in:
parent
b122e2beda
commit
5267e4108c
|
@ -38,7 +38,7 @@ class FediverseHtmlParser(HTMLParser):
|
|||
r"(^|[^\w\d\-_/])@([\w\d\-_]+(?:@[\w\d\-_\.]+[\w\d\-_]+)?)"
|
||||
)
|
||||
|
||||
HASHTAG_REGEX = re.compile(r"\B#([a-zA-Z0-9(_)]+\b)(?!;)")
|
||||
HASHTAG_REGEX = re.compile(r"\B#([\w()]+\b)(?!;)")
|
||||
|
||||
EMOJI_REGEX = re.compile(r"\B:([a-zA-Z0-9(_)-]+):\B")
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import pytest
|
||||
from django.template.defaultfilters import linebreaks_filter
|
||||
|
||||
from core.html import FediverseHtmlParser
|
||||
|
||||
|
@ -101,6 +102,16 @@ def test_parser(identity):
|
|||
assert parser.plain_text == "@TeSt@ExamPle.com"
|
||||
assert parser.mentions == {"test@example.com"}
|
||||
|
||||
# Ensure hashtags are parsed and linkified in local posts
|
||||
parser = FediverseHtmlParser(
|
||||
linebreaks_filter("#tag1-x,#tag2 #标签。"), find_hashtags=True
|
||||
)
|
||||
assert (
|
||||
parser.html
|
||||
== '<p><a href="/tags/tag1/" rel="tag">#tag1</a>-x,<a href="/tags/tag2/" rel="tag">#tag2</a> <a href="/tags/标签/" rel="tag">#标签</a>。</p>'
|
||||
)
|
||||
assert parser.hashtags == {"tag1", "tag2", "标签"}
|
||||
|
||||
# Ensure hashtags are linked, even through spans, but not within hrefs
|
||||
parser = FediverseHtmlParser(
|
||||
'<a href="http://example.com#notahashtag">something</a> <span>#</span>hashtag <a href="https://example.com/tags/hashtagtwo/">#hashtagtwo</a>',
|
||||
|
|
Loading…
Reference in New Issue