Fix canonicalize (#590)

This commit is contained in:
Humberto Rocha 2023-06-24 10:53:42 -04:00 committed by GitHub
parent 9038e498d5
commit 226a60bec7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 106 additions and 23 deletions

View File

@ -547,6 +547,18 @@ schemas = {
} }
}, },
}, },
"schema.org": {
"contentType": "application/ld+json",
"documentUrl": "https://schema.org/docs/jsonldcontext.json",
"contextUrl": None,
"document": {
"@context": {
"schema": "http://schema.org/",
"PropertyValue": {"@id": "schema:PropertyValue"},
"value": {"@id": "schema:value"},
},
},
},
} }
DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.Z" DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.Z"
@ -592,8 +604,15 @@ def canonicalise(json_data: dict, include_security: bool = False) -> dict:
""" """
if not isinstance(json_data, dict): if not isinstance(json_data, dict):
raise ValueError("Pass decoded JSON data into LDDocument") raise ValueError("Pass decoded JSON data into LDDocument")
context = [
"https://www.w3.org/ns/activitystreams", context = json_data.get("@context", [])
if not isinstance(context, list):
context = [context]
if not context:
context.append("https://www.w3.org/ns/activitystreams")
context.append(
{ {
"blurhash": "toot:blurhash", "blurhash": "toot:blurhash",
"Emoji": "toot:Emoji", "Emoji": "toot:Emoji",
@ -604,11 +623,12 @@ def canonicalise(json_data: dict, include_security: bool = False) -> dict:
"toot": "http://joinmastodon.org/ns#", "toot": "http://joinmastodon.org/ns#",
"votersCount": "toot:votersCount", "votersCount": "toot:votersCount",
"featured": {"@id": "toot:featured", "@type": "@id"}, "featured": {"@id": "toot:featured", "@type": "@id"},
}, }
] )
if include_security: if include_security:
context.append("https://w3id.org/security/v1") context.append("https://w3id.org/security/v1")
if "@context" not in json_data:
json_data["@context"] = context json_data["@context"] = context
return jsonld.compact(jsonld.expand(json_data), context) return jsonld.compact(jsonld.expand(json_data), context)

View File

@ -2,7 +2,7 @@ import datetime
from dateutil.tz import tzutc from dateutil.tz import tzutc
from core.ld import parse_ld_date from core.ld import canonicalise, parse_ld_date
def test_parse_ld_date(): def test_parse_ld_date():
@ -41,3 +41,68 @@ def test_parse_ld_date():
tzinfo=tzutc(), tzinfo=tzutc(),
) )
assert difference.total_seconds() == 0 assert difference.total_seconds() == 0
def test_canonicalise_single_attachment():
data = {
"@context": [
"https://www.w3.org/ns/activitystreams",
{
"schema": "http://schema.org#",
"PropertyValue": "schema:PropertyValue",
"value": "schema:value",
},
],
"attachment": [
{
"type": "http://schema.org#PropertyValue",
"name": "Location",
"http://schema.org#value": "Test Location",
},
],
}
parsed = canonicalise(data)
attachment = parsed["attachment"]
assert attachment["type"] == "PropertyValue"
assert attachment["name"] == "Location"
assert attachment["value"] == "Test Location"
def test_canonicalise_multiple_attachment():
data = {
"@context": [
"https://www.w3.org/ns/activitystreams",
{
"schema": "http://schema.org#",
"PropertyValue": "schema:PropertyValue",
"value": "schema:value",
},
],
"attachment": [
{
"type": "http://schema.org#PropertyValue",
"name": "Attachment 1",
"http://schema.org#value": "Test 1",
},
{
"type": "http://schema.org#PropertyValue",
"name": "Attachment 2",
"http://schema.org#value": "Test 2",
},
],
}
parsed = canonicalise(data)
attachment = parsed["attachment"]
assert len(attachment) == 2
assert attachment[0]["type"] == "PropertyValue"
assert attachment[0]["name"] == "Attachment 1"
assert attachment[0]["value"] == "Test 1"
assert attachment[1]["type"] == "PropertyValue"
assert attachment[1]["name"] == "Attachment 2"
assert attachment[1]["value"] == "Test 2"

View File

@ -854,16 +854,14 @@ class Identity(StatorModel):
self.metadata = [] self.metadata = []
for attachment in get_list(document, "attachment"): for attachment in get_list(document, "attachment"):
if ( if (
attachment["type"] == "http://schema.org#PropertyValue" attachment["type"] == "PropertyValue"
and "name" in attachment and "name" in attachment
and "http://schema.org#value" in attachment and "value" in attachment
): ):
self.metadata.append( self.metadata.append(
{ {
"name": attachment.get("name"), "name": attachment["name"],
"value": FediverseHtmlParser( "value": FediverseHtmlParser(attachment["value"]).html,
attachment.get("http://schema.org#value")
).html,
} }
) )
# Now go do webfinger with that info to see if we can get a canonical domain # Now go do webfinger with that info to see if we can get a canonical domain