From b031880e4193001eb945052d24d6df5c2a5f9882 Mon Sep 17 00:00:00 2001 From: Humberto Rocha Date: Mon, 20 Nov 2023 13:46:51 -0500 Subject: [PATCH] Extract json parser to core and use in fetch_actor (#663) --- activities/services/search.py | 31 ++----------------------------- core/json.py | 32 ++++++++++++++++++++++++++++++++ tests/api/test_search.py | 3 ++- users/models/identity.py | 6 +++++- 4 files changed, 41 insertions(+), 31 deletions(-) create mode 100644 core/json.py diff --git a/activities/services/search.py b/activities/services/search.py index 4807f24..8ed05e6 100644 --- a/activities/services/search.py +++ b/activities/services/search.py @@ -1,8 +1,7 @@ -import json - import httpx from activities.models import Hashtag, Post +from core.json import json_from_response from core.ld import canonicalise from users.models import Domain, Identity, IdentityStates from users.models.system_actor import SystemActor @@ -17,32 +16,6 @@ class SearchService: self.query = query.strip() self.identity = identity - def _json(self, response: httpx.Response) -> dict | None: - content_type, *parameters = ( - response.headers.get("Content-Type", "invalid").lower().split(";") - ) - - if content_type not in [ - "application/json", - "application/ld+json", - "application/activity+json", - ]: - return None - - charset = None - - for parameter in parameters: - key, value = parameter.split("=") - if key.strip() == "charset": - charset = value.strip() - - if charset: - return json.loads(response.content.decode(charset)) - else: - # if no charset informed, default to - # httpx json encoding inference - return response.json() - def search_identities_handle(self) -> set[Identity]: """ Searches for identities by their handles @@ -110,7 +83,7 @@ class SearchService: if response.status_code >= 400: return None - json_data = self._json(response) + json_data = json_from_response(response) if not json_data: return None diff --git a/core/json.py b/core/json.py new file mode 100644 index 0000000..1a71e97 --- /dev/null +++ b/core/json.py @@ -0,0 +1,32 @@ +import json + +from httpx import Response + +JSON_CONTENT_TYPES = [ + "application/json", + "application/ld+json", + "application/activity+json", +] + + +def json_from_response(response: Response) -> dict | None: + content_type, *parameters = ( + response.headers.get("Content-Type", "invalid").lower().split(";") + ) + + if content_type not in JSON_CONTENT_TYPES: + return None + + charset = None + + for parameter in parameters: + key, value = parameter.split("=") + if key.strip() == "charset": + charset = value.strip() + + if charset: + return json.loads(response.content.decode(charset)) + else: + # if no charset informed, default to + # httpx json for encoding inference + return response.json() diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 0cbf825..389535f 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -44,7 +44,7 @@ test_account_json = r""" "featuredTags":"https://search.example.com/users/searchtest/collections/tags", "preferredUsername":"searchtest", "name":"searchtest", - "summary":"

The official searchtest account for the instance.

", + "summary":"

Just a test (àáâãäåæ)

", "url":"https://search.example.com/@searchtest", "manuallyApprovesFollowers":false, "discoverable":true, @@ -113,3 +113,4 @@ def test_search( assert len(response["accounts"]) == 1 assert response["accounts"][0]["acct"] == "searchtest@search.example.com" assert response["accounts"][0]["username"] == "searchtest" + assert response["accounts"][0]["note"] == "

Just a test (àáâãäåæ)

" diff --git a/users/models/identity.py b/users/models/identity.py index 9e80226..63edc48 100644 --- a/users/models/identity.py +++ b/users/models/identity.py @@ -14,6 +14,7 @@ from lxml import etree from core.exceptions import ActorMismatchError from core.html import ContentRenderer, FediverseHtmlParser +from core.json import json_from_response from core.ld import ( canonicalise, format_ld_date, @@ -878,8 +879,11 @@ class Identity(StatorModel): "Client error fetching actor: %d %s", status_code, self.actor_uri ) return False + json_data = json_from_response(response) + if not json_data: + return False try: - document = canonicalise(response.json(), include_security=True) + document = canonicalise(json_data, include_security=True) except ValueError: # servers with empty or invalid responses are inevitable logger.info(