takahe/users/models/domain.py

297 lines
9.6 KiB
Python

import json
import logging
import re
import ssl
from functools import cached_property
from typing import Optional
import httpx
import pydantic
import urlman
from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import models
from core.models import Config
from stator.models import State, StateField, StateGraph, StatorModel
from users.schemas import NodeInfo
logger = logging.getLogger(__name__)
class DomainStates(StateGraph):
outdated = State(try_interval=60 * 30, force_initial=True)
updated = State(try_interval=60 * 60 * 24, attempt_immediately=False)
connection_issue = State(externally_progressed=True)
purged = State()
outdated.transitions_to(updated)
updated.transitions_to(outdated)
updated.transitions_to(updated)
outdated.transitions_to(connection_issue)
outdated.transitions_to(purged)
connection_issue.transitions_to(outdated)
connection_issue.transitions_to(purged)
outdated.times_out_to(connection_issue, 60 * 60 * 24)
@classmethod
def handle_outdated(cls, instance: "Domain"):
# Don't talk to servers we've blocked
if instance.blocked:
return cls.updated
# Pull their nodeinfo URI
info = instance.fetch_nodeinfo()
if info:
instance.nodeinfo = info.dict()
instance.save()
return cls.updated
@classmethod
def handle_updated(cls, instance: "Domain"):
if instance.blocked:
return cls.updated
return cls.outdated
def _domain_validator(value: str):
if not Domain.is_valid_domain(value):
raise ValidationError(
"%(value)s is not a valid domain",
params={"value": value},
)
class Domain(StatorModel):
"""
Represents a domain that a user can have an account on.
For protocol reasons, if we want to allow custom usernames
per domain, each "display" domain (the one in the handle) must either let
us serve on it directly, or have a "service" domain that maps
to it uniquely that we can serve on that.
That way, someone coming in with just an Actor URI as their
entrypoint can still try to webfinger preferredUsername@actorDomain
and we can return an appropriate response.
It's possible to just have one domain do both jobs, of course.
This model also represents _other_ servers' domains, which we treat as
display domains for now, until we start doing better probing.
"""
domain = models.CharField(
max_length=250, primary_key=True, validators=[_domain_validator]
)
service_domain = models.CharField(
max_length=250,
null=True,
blank=True,
db_index=True,
unique=True,
)
state = StateField(DomainStates)
# nodeinfo 2.0 detail about the remote server
nodeinfo = models.JSONField(null=True, blank=True)
# If we own this domain
local = models.BooleanField()
# If we have blocked this domain from interacting with us
blocked = models.BooleanField(default=False)
# Domains can be joinable by any user of the instance (as the default one
# should)
public = models.BooleanField(default=False)
# If this is the default domain (shown as the default entry for new users)
default = models.BooleanField(default=False)
# Domains can also be linked to one or more users for their private use
# This should be display domains ONLY
users = models.ManyToManyField("users.User", related_name="domains", blank=True)
# Free-form notes field for admins
notes = models.TextField(blank=True, null=True)
created = models.DateTimeField(auto_now_add=True)
updated = models.DateTimeField(auto_now=True)
class urls(urlman.Urls):
root = "/admin/domains/"
create = "/admin/domains/create/"
edit = "/admin/domains/{self.domain}/"
delete = "{edit}delete/"
root_federation = "/admin/federation/"
edit_federation = "/admin/federation/{self.domain}/"
class Meta:
indexes: list = []
@classmethod
def is_valid_domain(cls, domain: str) -> bool:
"""
Check if a domain is valid, domain must be lowercase
"""
return (
re.match(
r"^(?:[a-z0-9](?:[a-z0-9-_]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-_]{0,61}[a-z]$",
domain,
)
is not None
)
@classmethod
def get_remote_domain(cls, domain: str) -> "Domain":
return cls.objects.get_or_create(domain=domain.lower(), local=False)[0]
@classmethod
def get_domain(cls, domain: str) -> Optional["Domain"]:
try:
return cls.objects.get(
models.Q(domain=domain.lower())
| models.Q(service_domain=domain.lower())
)
except cls.DoesNotExist:
return None
@property
def uri_domain(self) -> str:
if self.service_domain:
return self.service_domain
return self.domain
@classmethod
def available_for_user(cls, user):
"""
Returns domains that are available for the user to put an identity on
"""
return cls.objects.filter(
models.Q(public=True) | models.Q(users__id=user.id),
local=True,
).order_by("-default", "domain")
def __str__(self):
return self.domain
def save(self, *args, **kwargs):
# Ensure that we are not conflicting with other domains
if Domain.objects.filter(service_domain=self.domain).exists():
raise ValueError(
f"Domain {self.domain} is already a service domain elsewhere!"
)
if self.service_domain:
if Domain.objects.filter(domain=self.service_domain).exists():
raise ValueError(
f"Service domain {self.service_domain} is already a domain elsewhere!"
)
super().save(*args, **kwargs)
def fetch_nodeinfo(self) -> NodeInfo | None:
"""
Fetch the /NodeInfo/2.0 for the domain
"""
nodeinfo20_url = f"https://{self.domain}/nodeinfo/2.0"
with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client:
try:
response = client.get(
f"https://{self.domain}/.well-known/nodeinfo",
follow_redirects=True,
headers={"Accept": "application/json"},
)
except httpx.HTTPError:
pass
except (ssl.SSLCertVerificationError, ssl.SSLError):
return None
else:
try:
for link in response.json().get("links", []):
if (
link.get("rel")
== "http://nodeinfo.diaspora.software/ns/schema/2.0"
):
nodeinfo20_url = link.get("href", nodeinfo20_url)
break
except json.JSONDecodeError:
pass
try:
response = client.get(
nodeinfo20_url,
follow_redirects=True,
headers={"Accept": "application/json"},
)
response.raise_for_status()
except (httpx.HTTPError, ssl.SSLCertVerificationError) as ex:
response = getattr(ex, "response", None)
if (
response
and response.status_code < 500
and response.status_code not in [401, 403, 404, 406, 410]
):
logger.warning(
"Client error fetching nodeinfo: %d %s %s",
response.status_code,
nodeinfo20_url,
ex,
extra={
"content": response.content,
"domain": self.domain,
},
)
return None
try:
info = NodeInfo(**response.json())
except (json.JSONDecodeError, pydantic.ValidationError) as ex:
logger.warning(
"Client error decoding nodeinfo: %s %s",
nodeinfo20_url,
ex,
extra={
"domain": self.domain,
},
)
return None
return info
@property
def software(self):
if self.nodeinfo:
software = self.nodeinfo.get("software", {})
name = software.get("name", "unknown")
version = software.get("version", "unknown")
return f"{name:.10} - {version:.10}"
return None
def recursively_blocked(self) -> bool:
"""
Checks for blocks on all right subsets of this domain, except the very
last part of the TLD.
Yes, I know this weirdly lets you block ".co.uk" or whatever, but
people can do that if they want I guess.
"""
# Efficient short-circuit
if self.blocked:
return True
# Build domain list
domain_parts = [self.domain]
while "." in domain_parts[-1]:
domain_parts.append(domain_parts[-1].split(".", 1)[1])
# See if any of those are blocked
return Domain.objects.filter(domain__in=domain_parts, blocked=True).exists()
### Config ###
@cached_property
def config_domain(self) -> Config.DomainOptions:
return Config.load_domain(self)