Extract json parser to core and use in fetch_actor (#663)

This commit is contained in:
Humberto Rocha 2023-11-20 13:46:51 -05:00 committed by GitHub
parent 81d019ad0d
commit b031880e41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 31 deletions

View File

@ -1,8 +1,7 @@
import json
import httpx
from activities.models import Hashtag, Post
from core.json import json_from_response
from core.ld import canonicalise
from users.models import Domain, Identity, IdentityStates
from users.models.system_actor import SystemActor
@ -17,32 +16,6 @@ class SearchService:
self.query = query.strip()
self.identity = identity
def _json(self, response: httpx.Response) -> dict | None:
content_type, *parameters = (
response.headers.get("Content-Type", "invalid").lower().split(";")
)
if content_type not in [
"application/json",
"application/ld+json",
"application/activity+json",
]:
return None
charset = None
for parameter in parameters:
key, value = parameter.split("=")
if key.strip() == "charset":
charset = value.strip()
if charset:
return json.loads(response.content.decode(charset))
else:
# if no charset informed, default to
# httpx json encoding inference
return response.json()
def search_identities_handle(self) -> set[Identity]:
"""
Searches for identities by their handles
@ -110,7 +83,7 @@ class SearchService:
if response.status_code >= 400:
return None
json_data = self._json(response)
json_data = json_from_response(response)
if not json_data:
return None

32
core/json.py Normal file
View File

@ -0,0 +1,32 @@
import json
from httpx import Response
JSON_CONTENT_TYPES = [
"application/json",
"application/ld+json",
"application/activity+json",
]
def json_from_response(response: Response) -> dict | None:
content_type, *parameters = (
response.headers.get("Content-Type", "invalid").lower().split(";")
)
if content_type not in JSON_CONTENT_TYPES:
return None
charset = None
for parameter in parameters:
key, value = parameter.split("=")
if key.strip() == "charset":
charset = value.strip()
if charset:
return json.loads(response.content.decode(charset))
else:
# if no charset informed, default to
# httpx json for encoding inference
return response.json()

View File

@ -44,7 +44,7 @@ test_account_json = r"""
"featuredTags":"https://search.example.com/users/searchtest/collections/tags",
"preferredUsername":"searchtest",
"name":"searchtest",
"summary":"<p>The official searchtest account for the instance.</p>",
"summary":"<p>Just a test (àáâãäåæ)</p>",
"url":"https://search.example.com/@searchtest",
"manuallyApprovesFollowers":false,
"discoverable":true,
@ -113,3 +113,4 @@ def test_search(
assert len(response["accounts"]) == 1
assert response["accounts"][0]["acct"] == "searchtest@search.example.com"
assert response["accounts"][0]["username"] == "searchtest"
assert response["accounts"][0]["note"] == "<p>Just a test (àáâãäåæ)</p>"

View File

@ -14,6 +14,7 @@ from lxml import etree
from core.exceptions import ActorMismatchError
from core.html import ContentRenderer, FediverseHtmlParser
from core.json import json_from_response
from core.ld import (
canonicalise,
format_ld_date,
@ -878,8 +879,11 @@ class Identity(StatorModel):
"Client error fetching actor: %d %s", status_code, self.actor_uri
)
return False
json_data = json_from_response(response)
if not json_data:
return False
try:
document = canonicalise(response.json(), include_security=True)
document = canonicalise(json_data, include_security=True)
except ValueError:
# servers with empty or invalid responses are inevitable
logger.info(