Templatize robots.txt (#478)
This commit is contained in:
parent
aeba38b8ae
commit
93c0af992b
|
@ -2,6 +2,7 @@ import json
|
|||
from typing import ClassVar
|
||||
|
||||
import markdown_it
|
||||
from django.conf import settings
|
||||
from django.http import HttpResponse
|
||||
from django.shortcuts import redirect
|
||||
from django.templatetags.static import static
|
||||
|
@ -69,6 +70,23 @@ class StaticContentView(View):
|
|||
raise NotImplementedError()
|
||||
|
||||
|
||||
@method_decorator(cache_page(60 * 60), name="dispatch")
|
||||
class RobotsTxt(TemplateView):
|
||||
"""
|
||||
Serves the robots.txt for Takahē
|
||||
|
||||
To specify additional user-agents to disallow, use TAKAHE_ROBOTS_TXT_DISALLOWED_USER_AGENTS
|
||||
"""
|
||||
|
||||
template_name = "robots.txt"
|
||||
content_type = "text/plain"
|
||||
|
||||
def get_context_data(self):
|
||||
return {
|
||||
"user_agents": getattr(settings, "ROBOTS_TXT_DISALLOWED_USER_AGENTS", []),
|
||||
}
|
||||
|
||||
|
||||
@method_decorator(cache_control(max_age=60 * 15), name="dispatch")
|
||||
class AppManifest(StaticContentView):
|
||||
"""
|
||||
|
|
|
@ -29,11 +29,6 @@ server {
|
|||
proxy_hide_header X-Takahe-User;
|
||||
proxy_hide_header X-Takahe-Identity;
|
||||
|
||||
# Serve robots.txt from the non-collected dir as a special case.
|
||||
location /robots.txt {
|
||||
alias /takahe/static/robots.txt;
|
||||
}
|
||||
|
||||
# Serves static files from the collected dir
|
||||
location /static/ {
|
||||
# Files in static have cache-busting hashes in the name, thus can be cached forever
|
||||
|
|
|
@ -105,6 +105,10 @@ class Settings(BaseSettings):
|
|||
AUTO_ADMIN_EMAIL: EmailStr | None = None
|
||||
ERROR_EMAILS: list[EmailStr] | None = None
|
||||
|
||||
#: If set, a list of user agents to completely disallow in robots.txt
|
||||
#: List formatting must be a valid JSON list, such as `["Agent1", "Agent2"]`
|
||||
ROBOTS_TXT_DISALLOWED_USER_AGENTS: list[str] = Field(default_factory=list)
|
||||
|
||||
MEDIA_URL: str = "/media/"
|
||||
MEDIA_ROOT: str = str(BASE_DIR / "media")
|
||||
MEDIA_BACKEND: MediaBackendUrl | None = None
|
||||
|
@ -313,6 +317,8 @@ STATOR_TOKEN = SETUP.STATOR_TOKEN
|
|||
STATOR_CONCURRENCY = SETUP.STATOR_CONCURRENCY
|
||||
STATOR_CONCURRENCY_PER_MODEL = SETUP.STATOR_CONCURRENCY_PER_MODEL
|
||||
|
||||
ROBOTS_TXT_DISALLOWED_USER_AGENTS = SETUP.ROBOTS_TXT_DISALLOWED_USER_AGENTS
|
||||
|
||||
CORS_ORIGIN_ALLOW_ALL = True # Temporary
|
||||
CORS_ORIGIN_WHITELIST = SETUP.CORS_HOSTS
|
||||
CORS_ALLOW_CREDENTIALS = True
|
||||
|
|
|
@ -19,6 +19,7 @@ from users.views import (
|
|||
|
||||
urlpatterns = [
|
||||
path("", core.homepage),
|
||||
path("robots.txt", core.RobotsTxt.as_view()),
|
||||
path("manifest.json", core.AppManifest.as_view()),
|
||||
# Activity views
|
||||
path("notifications/", timelines.Notifications.as_view(), name="notifications"),
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
User-Agent: *
|
||||
|
||||
# Don't allow any bot to crawl tags.
|
||||
Disallow: /tags/
|
||||
Disallow: /tags/*
|
||||
|
||||
# Don't allow bots to crawl through the proxy
|
||||
Disallow: /proxy/*
|
||||
|
||||
{% for user_agent in user_agents %}
|
||||
User-agent: {{user_agent}}
|
||||
Disallow: /
|
||||
{% endfor %}
|
Loading…
Reference in New Issue