2023-11-19 10:32:35 -08:00
|
|
|
import json
|
|
|
|
|
2022-12-11 10:22:06 -08:00
|
|
|
import httpx
|
|
|
|
|
|
|
|
from activities.models import Hashtag, Post
|
|
|
|
from core.ld import canonicalise
|
|
|
|
from users.models import Domain, Identity, IdentityStates
|
|
|
|
from users.models.system_actor import SystemActor
|
|
|
|
|
|
|
|
|
2022-12-20 02:17:52 -08:00
|
|
|
class SearchService:
|
2022-12-11 10:22:06 -08:00
|
|
|
"""
|
|
|
|
Captures the logic needed to search - reused in the UI and API
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, query: str, identity: Identity | None):
|
2022-12-21 13:39:56 -08:00
|
|
|
self.query = query.strip()
|
2022-12-11 10:22:06 -08:00
|
|
|
self.identity = identity
|
|
|
|
|
2023-11-19 10:32:35 -08:00
|
|
|
def _json(self, response: httpx.Response) -> dict | None:
|
|
|
|
content_type, *parameters = (
|
|
|
|
response.headers.get("Content-Type", "invalid").lower().split(";")
|
|
|
|
)
|
|
|
|
|
|
|
|
if content_type not in [
|
|
|
|
"application/json",
|
|
|
|
"application/ld+json",
|
|
|
|
"application/activity+json",
|
|
|
|
]:
|
|
|
|
return None
|
|
|
|
|
|
|
|
charset = None
|
|
|
|
|
|
|
|
for parameter in parameters:
|
|
|
|
key, value = parameter.split("=")
|
|
|
|
if key.strip() == "charset":
|
|
|
|
charset = value.strip()
|
|
|
|
|
|
|
|
if charset:
|
|
|
|
return json.loads(response.content.decode(charset))
|
|
|
|
else:
|
|
|
|
# if no charset informed, default to
|
|
|
|
# httpx json encoding inference
|
|
|
|
return response.json()
|
|
|
|
|
2022-12-11 10:22:06 -08:00
|
|
|
def search_identities_handle(self) -> set[Identity]:
|
|
|
|
"""
|
|
|
|
Searches for identities by their handles
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Short circuit if it's obviously not for us
|
|
|
|
if "://" in self.query:
|
|
|
|
return set()
|
|
|
|
|
|
|
|
# Try to fetch the user by handle
|
2022-12-21 13:46:09 -08:00
|
|
|
handle = self.query.lstrip("@")
|
2022-12-11 10:22:06 -08:00
|
|
|
results: set[Identity] = set()
|
|
|
|
if "@" in handle:
|
|
|
|
username, domain = handle.split("@", 1)
|
|
|
|
|
|
|
|
# Resolve the domain to the display domain
|
|
|
|
domain_instance = Domain.get_domain(domain)
|
|
|
|
try:
|
|
|
|
if domain_instance is None:
|
|
|
|
raise Identity.DoesNotExist()
|
|
|
|
identity = Identity.objects.get(
|
2022-12-21 13:39:56 -08:00
|
|
|
domain=domain_instance,
|
|
|
|
username__iexact=username,
|
2022-12-11 10:22:06 -08:00
|
|
|
)
|
|
|
|
except Identity.DoesNotExist:
|
2022-12-30 00:02:33 -08:00
|
|
|
identity = None
|
2022-12-11 10:22:06 -08:00
|
|
|
if self.identity is not None:
|
2022-12-30 00:02:33 -08:00
|
|
|
try:
|
|
|
|
# Allow authenticated users to fetch remote
|
|
|
|
identity = Identity.by_username_and_domain(
|
|
|
|
username, domain_instance or domain, fetch=True
|
|
|
|
)
|
|
|
|
if identity and identity.state == IdentityStates.outdated:
|
2023-07-16 23:37:47 -07:00
|
|
|
identity.fetch_actor()
|
2022-12-30 00:02:33 -08:00
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
|
2022-12-11 10:22:06 -08:00
|
|
|
if identity:
|
|
|
|
results.add(identity)
|
|
|
|
|
|
|
|
else:
|
|
|
|
for identity in Identity.objects.filter(username=handle)[:20]:
|
|
|
|
results.add(identity)
|
2023-01-25 18:17:52 -08:00
|
|
|
for identity in Identity.objects.filter(username__istartswith=handle)[:20]:
|
2022-12-11 10:22:06 -08:00
|
|
|
results.add(identity)
|
|
|
|
return results
|
|
|
|
|
|
|
|
def search_url(self) -> Post | Identity | None:
|
|
|
|
"""
|
|
|
|
Searches for an identity or post by URL.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Short circuit if it's obviously not for us
|
|
|
|
if "://" not in self.query:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Fetch the provided URL as the system actor to retrieve the AP JSON
|
|
|
|
try:
|
2023-07-16 23:37:47 -07:00
|
|
|
response = SystemActor().signed_request(
|
2022-12-11 10:22:06 -08:00
|
|
|
method="get",
|
|
|
|
uri=self.query,
|
|
|
|
)
|
2022-12-17 11:29:48 -08:00
|
|
|
except httpx.RequestError:
|
2022-12-11 10:22:06 -08:00
|
|
|
return None
|
|
|
|
if response.status_code >= 400:
|
|
|
|
return None
|
2023-11-19 10:32:35 -08:00
|
|
|
|
|
|
|
json_data = self._json(response)
|
|
|
|
if not json_data:
|
2023-11-15 14:00:56 -08:00
|
|
|
return None
|
2023-11-19 10:32:35 -08:00
|
|
|
|
|
|
|
document = canonicalise(json_data, include_security=True)
|
2022-12-11 10:22:06 -08:00
|
|
|
type = document.get("type", "unknown").lower()
|
|
|
|
|
|
|
|
# Is it an identity?
|
2022-12-19 22:52:33 -08:00
|
|
|
if type in Identity.ACTOR_TYPES:
|
2022-12-11 10:22:06 -08:00
|
|
|
# Try and retrieve the profile by actor URI
|
|
|
|
identity = Identity.by_actor_uri(document["id"], create=True)
|
|
|
|
if identity and identity.state == IdentityStates.outdated:
|
2023-07-16 23:37:47 -07:00
|
|
|
identity.fetch_actor()
|
2022-12-11 10:22:06 -08:00
|
|
|
return identity
|
|
|
|
|
|
|
|
# Is it a post?
|
2023-01-29 11:26:08 -08:00
|
|
|
elif type in [value.lower() for value in Post.Types.values]:
|
2022-12-11 10:22:06 -08:00
|
|
|
# Try and retrieve the post by URI
|
|
|
|
# (we do not trust the JSON we just got - fetch from source!)
|
|
|
|
try:
|
|
|
|
return Post.by_object_uri(document["id"], fetch=True)
|
|
|
|
except Post.DoesNotExist:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Dunno what it is
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def search_hashtags(self) -> set[Hashtag]:
|
|
|
|
"""
|
|
|
|
Searches for hashtags by their name
|
|
|
|
"""
|
|
|
|
|
|
|
|
# Short circuit out if it's obviously not a hashtag
|
|
|
|
if "@" in self.query or "://" in self.query:
|
|
|
|
return set()
|
|
|
|
|
|
|
|
results: set[Hashtag] = set()
|
2022-12-21 13:39:56 -08:00
|
|
|
name = self.query.lstrip("#").lower()
|
2022-12-11 10:22:06 -08:00
|
|
|
for hashtag in Hashtag.objects.public().hashtag_or_alias(name)[:10]:
|
|
|
|
results.add(hashtag)
|
|
|
|
for hashtag in Hashtag.objects.public().filter(hashtag__startswith=name)[:10]:
|
|
|
|
results.add(hashtag)
|
|
|
|
return results
|
|
|
|
|
2023-05-03 21:42:37 -07:00
|
|
|
def search_post_content(self):
|
|
|
|
"""
|
|
|
|
Searches for posts on an identity via full text search
|
|
|
|
"""
|
2023-05-05 23:30:59 -07:00
|
|
|
return self.identity.posts.unlisted(include_replies=True).filter(
|
|
|
|
content__search=self.query
|
|
|
|
)[:50]
|
2023-05-03 21:42:37 -07:00
|
|
|
|
2022-12-11 10:22:06 -08:00
|
|
|
def search_all(self):
|
|
|
|
"""
|
|
|
|
Returns all possible results for a search
|
|
|
|
"""
|
|
|
|
results = {
|
|
|
|
"identities": self.search_identities_handle(),
|
|
|
|
"hashtags": self.search_hashtags(),
|
|
|
|
"posts": set(),
|
|
|
|
}
|
|
|
|
url_result = self.search_url()
|
|
|
|
if isinstance(url_result, Identity):
|
|
|
|
results["identities"].add(url_result)
|
|
|
|
if isinstance(url_result, Post):
|
|
|
|
results["posts"].add(url_result)
|
|
|
|
return results
|