takahe/activities/services/search.py

155 lines
5.1 KiB
Python
Raw Normal View History

2022-12-11 10:22:06 -08:00
import httpx
from activities.models import Hashtag, Post
from core.ld import canonicalise
from users.models import Domain, Identity, IdentityStates
from users.models.system_actor import SystemActor
class SearchService:
2022-12-11 10:22:06 -08:00
"""
Captures the logic needed to search - reused in the UI and API
"""
def __init__(self, query: str, identity: Identity | None):
self.query = query.strip()
2022-12-11 10:22:06 -08:00
self.identity = identity
def search_identities_handle(self) -> set[Identity]:
"""
Searches for identities by their handles
"""
# Short circuit if it's obviously not for us
if "://" in self.query:
return set()
# Try to fetch the user by handle
2022-12-21 13:46:09 -08:00
handle = self.query.lstrip("@")
2022-12-11 10:22:06 -08:00
results: set[Identity] = set()
if "@" in handle:
username, domain = handle.split("@", 1)
# Resolve the domain to the display domain
domain_instance = Domain.get_domain(domain)
try:
if domain_instance is None:
raise Identity.DoesNotExist()
identity = Identity.objects.get(
domain=domain_instance,
username__iexact=username,
2022-12-11 10:22:06 -08:00
)
except Identity.DoesNotExist:
identity = None
2022-12-11 10:22:06 -08:00
if self.identity is not None:
try:
# Allow authenticated users to fetch remote
identity = Identity.by_username_and_domain(
username, domain_instance or domain, fetch=True
)
if identity and identity.state == IdentityStates.outdated:
identity.fetch_actor()
except ValueError:
pass
2022-12-11 10:22:06 -08:00
if identity:
results.add(identity)
else:
for identity in Identity.objects.filter(username=handle)[:20]:
results.add(identity)
for identity in Identity.objects.filter(username__istartswith=handle)[:20]:
2022-12-11 10:22:06 -08:00
results.add(identity)
return results
def search_url(self) -> Post | Identity | None:
"""
Searches for an identity or post by URL.
"""
# Short circuit if it's obviously not for us
if "://" not in self.query:
return None
# Fetch the provided URL as the system actor to retrieve the AP JSON
try:
response = SystemActor().signed_request(
2022-12-11 10:22:06 -08:00
method="get",
uri=self.query,
)
2022-12-17 11:29:48 -08:00
except httpx.RequestError:
2022-12-11 10:22:06 -08:00
return None
if response.status_code >= 400:
return None
content_type = response.headers.get("Content-Type", "").lower()
if content_type not in [
"application/json",
"application/ld+json",
"application/activity+json",
]:
return None
2022-12-11 10:22:06 -08:00
document = canonicalise(response.json(), include_security=True)
type = document.get("type", "unknown").lower()
# Is it an identity?
2022-12-19 22:52:33 -08:00
if type in Identity.ACTOR_TYPES:
2022-12-11 10:22:06 -08:00
# Try and retrieve the profile by actor URI
identity = Identity.by_actor_uri(document["id"], create=True)
if identity and identity.state == IdentityStates.outdated:
identity.fetch_actor()
2022-12-11 10:22:06 -08:00
return identity
# Is it a post?
elif type in [value.lower() for value in Post.Types.values]:
2022-12-11 10:22:06 -08:00
# Try and retrieve the post by URI
# (we do not trust the JSON we just got - fetch from source!)
try:
return Post.by_object_uri(document["id"], fetch=True)
except Post.DoesNotExist:
return None
# Dunno what it is
else:
return None
def search_hashtags(self) -> set[Hashtag]:
"""
Searches for hashtags by their name
"""
# Short circuit out if it's obviously not a hashtag
if "@" in self.query or "://" in self.query:
return set()
results: set[Hashtag] = set()
name = self.query.lstrip("#").lower()
2022-12-11 10:22:06 -08:00
for hashtag in Hashtag.objects.public().hashtag_or_alias(name)[:10]:
results.add(hashtag)
for hashtag in Hashtag.objects.public().filter(hashtag__startswith=name)[:10]:
results.add(hashtag)
return results
def search_post_content(self):
"""
Searches for posts on an identity via full text search
"""
2023-05-05 23:30:59 -07:00
return self.identity.posts.unlisted(include_replies=True).filter(
content__search=self.query
)[:50]
2022-12-11 10:22:06 -08:00
def search_all(self):
"""
Returns all possible results for a search
"""
results = {
"identities": self.search_identities_handle(),
"hashtags": self.search_hashtags(),
"posts": set(),
}
url_result = self.search_url()
if isinstance(url_result, Identity):
results["identities"].add(url_result)
if isinstance(url_result, Post):
results["posts"].add(url_result)
return results