Nginx now bundled in image, does media caching
Also serves static files. Old media caching removed.
This commit is contained in:
parent
3de188e406
commit
a26263fb05
|
@ -9,6 +9,8 @@ RUN apt-get update \
|
|||
&& apt-get install -y --no-install-recommends \
|
||||
libpq5 \
|
||||
libxslt1.1 \
|
||||
nginx \
|
||||
busybox \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
|
@ -30,6 +32,8 @@ RUN apt-get update \
|
|||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN mkdir -p /cache
|
||||
|
||||
COPY . /takahe
|
||||
|
||||
WORKDIR /takahe
|
||||
|
@ -41,4 +45,4 @@ EXPOSE 8000
|
|||
# Set some sensible defaults
|
||||
ENV GUNICORN_CMD_ARGS="--workers 8"
|
||||
|
||||
CMD ["gunicorn", "takahe.wsgi:application", "-b", "0.0.0.0:8000"]
|
||||
CMD ["bash", "docker/run.sh"]
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
daemon off;
|
||||
error_log /dev/stdout info;
|
||||
|
||||
events {
|
||||
worker_connections 4096;
|
||||
}
|
||||
|
||||
http {
|
||||
|
||||
include /etc/nginx/mime.types;
|
||||
|
||||
proxy_cache_path /cache/nginx levels=1:2 keys_zone=takahe:20m inactive=14d max_size=__CACHESIZE__;
|
||||
|
||||
upstream takahe {
|
||||
server "127.0.0.1:8001";
|
||||
}
|
||||
|
||||
server {
|
||||
listen 8000;
|
||||
listen [::]:8000;
|
||||
server_name _;
|
||||
|
||||
root /takahe/static;
|
||||
index index.html;
|
||||
|
||||
ignore_invalid_headers on;
|
||||
proxy_connect_timeout 900;
|
||||
|
||||
proxy_headers_hash_max_size 1024;
|
||||
proxy_headers_hash_bucket_size 128;
|
||||
|
||||
client_max_body_size 512M;
|
||||
client_body_buffer_size 128k;
|
||||
charset utf-8;
|
||||
|
||||
proxy_set_header Host $http_host;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_http_version 1.1;
|
||||
proxy_cache takahe;
|
||||
|
||||
location /static/ {
|
||||
alias /takahe/static-collected/;
|
||||
}
|
||||
|
||||
location ~* ^/(media|proxy) {
|
||||
proxy_cache_key $host$uri;
|
||||
proxy_cache_valid 200 304 720h;
|
||||
proxy_cache_valid 301 307 12h;
|
||||
proxy_cache_valid 500 502 503 504 0s;
|
||||
proxy_cache_valid any 72h;
|
||||
|
||||
add_header X-Cache $upstream_cache_status;
|
||||
|
||||
proxy_pass http://takahe;
|
||||
}
|
||||
|
||||
location / {
|
||||
proxy_redirect off;
|
||||
proxy_buffering off;
|
||||
proxy_pass http://takahe;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Set up cache size
|
||||
CACHE_SIZE="${TAKAHE_NGINX_CACHE_SIZE:-1g}"
|
||||
sed -i s/__CACHESIZE__/${CACHE_SIZE}/g /takahe/docker/nginx.conf
|
||||
|
||||
# Run nginx and gunicorn
|
||||
nginx -c "/takahe/docker/nginx.conf" &
|
||||
gunicorn takahe.wsgi:application -b 0.0.0.0:8001 &
|
||||
|
||||
# Wait for any process to exit
|
||||
wait -n
|
||||
|
||||
# Exit with status of process that exited first
|
||||
exit $?
|
|
@ -45,6 +45,10 @@ You'll need to run two copies of our `Docker image <https://hub.docker.com/r/joi
|
|||
|
||||
* One with the arguments ``python3 manage.py runstator``, which will run the background worker
|
||||
|
||||
These containers will need the ability to write at least 1GB of files out
|
||||
to their scratch disks. See the ``TAKAHE_NGINX_CACHE_SIZE`` environment
|
||||
variable for more.
|
||||
|
||||
.. note::
|
||||
|
||||
If you cannot run a background worker for some reason, you can instead
|
||||
|
@ -59,7 +63,9 @@ project, so if you know what you're doing, go for it - but we won't be able
|
|||
to give you support.
|
||||
|
||||
If you are running on Kubernetes, we recommend that you make one Deployment
|
||||
for the webserver and one Deployment for the background worker.
|
||||
for the webserver and one Deployment for the background worker. We also
|
||||
recommend that you mount an ``emptyDir`` to the ``/cache/`` path on the
|
||||
webserver containers, as this is where the media cache will be stored.
|
||||
|
||||
|
||||
Environment Variables
|
||||
|
@ -113,6 +119,12 @@ be provided to the containers from the first boot.
|
|||
``["andrew@aeracode.org"]`` (if you're doing this via shell, be careful
|
||||
about escaping!)
|
||||
|
||||
In addition, there are some optional variables you can set:
|
||||
|
||||
* ``TAKAHE_NGINX_CACHE_SIZE`` allows you to specify the size of the disk cache
|
||||
that is used to cache proxied avatars, profile images and media. See
|
||||
:doc:`tuning` for more.
|
||||
|
||||
|
||||
.. _media_configuration:
|
||||
|
||||
|
|
120
docs/tuning.rst
120
docs/tuning.rst
|
@ -9,26 +9,6 @@ We recommend that all installations are run behind a CDN, and
|
|||
have caches configured. See below for more details on each.
|
||||
|
||||
|
||||
CDNs
|
||||
----
|
||||
|
||||
Takahē is *designed to be run behind a CDN*. It serves most static files directly
|
||||
from its main webservers, which is inefficient if called directly, but they
|
||||
have ``Cache-Control`` headers set so that the CDN can do the heavy lifting -
|
||||
more efficiently than offloading all files to something like S3.
|
||||
|
||||
If you don't run behind a CDN, things will still work, but even a medium
|
||||
level of traffic might put the webservers under a lot of load.
|
||||
|
||||
If you do run behind a CDN, ensure that your CDN is set to respect
|
||||
``Cache-Control`` headers from the origin. Some CDNs go purely off of file
|
||||
extensions by default, which will not capture all of the proxy views Takahē
|
||||
uses to show remote images without leaking user information.
|
||||
|
||||
If you don't want to use a CDN but still want a performance improvement, a
|
||||
read-through cache that respects ``Cache-Control``, like Varnish, will
|
||||
also help if placed in front of Takahē.
|
||||
|
||||
|
||||
Scaling
|
||||
-------
|
||||
|
@ -88,11 +68,7 @@ servers may consider it permanently unreachable and stop sending posts.
|
|||
Caching
|
||||
-------
|
||||
|
||||
By default Takakē has caching disabled. The caching needs of a server can
|
||||
varying drastically based upon the number of users and how interconnected
|
||||
they are with other servers.
|
||||
|
||||
There are multiple ways Takahē uses caches:
|
||||
There are two ways Takahē uses caches:
|
||||
|
||||
* For caching rendered pages and responses, like user profile information.
|
||||
These caches reduce database load on your server and improve performance.
|
||||
|
@ -101,32 +77,26 @@ There are multiple ways Takahē uses caches:
|
|||
proxied to protect your users' privacy; also caching these reduces
|
||||
your server's consumed bandwidth and improves users' loading times.
|
||||
|
||||
The exact caches you can configure are:
|
||||
By default Takakē has Nginx inside its container image configured to perform
|
||||
read-through HTTP caching for the image and media files, and no cache
|
||||
configured for page rendering.
|
||||
|
||||
* ``TAKAHE_CACHES_DEFAULT``: Rendered page and response caching
|
||||
Each cache can be adjusted to your needs; let's talk about both.
|
||||
|
||||
* ``TAKAHE_CACHES_MEDIA``: Remote post images and user profile header pictures
|
||||
|
||||
* ``TAKAHE_CACHES_AVATARS``: Remote user avatars ("icons") only
|
||||
Page Caching
|
||||
~~~~~~~~~~~~
|
||||
|
||||
We recommend you set up ``TAKAHE_CACHES_MEDIA`` and ``TAKAHE_CACHES_AVATARS``
|
||||
at a bare minimum - proxying these all the time without caching will eat into
|
||||
your server's bandwidth.
|
||||
This caching helps Takahē avoid database hits by rendering complex pages or
|
||||
API endpoints only once, and turning it on will reduce your database load.
|
||||
There is no cache enabled for this by default
|
||||
|
||||
All caches are configured the same way - with a custom cache URI/URL. We
|
||||
support anything that is available as part of
|
||||
To configure it, set the ``TAKAHE_CACHES_DEFAULT`` environment variable.
|
||||
We support anything that is available as part of
|
||||
`django-cache-url <https://github.com/epicserve/django-cache-url>`_, but
|
||||
some cache backends will require additional Python packages not installed
|
||||
by default with Takahē. More discussion on backend is below.
|
||||
by default with Takahē. More discussion on some major backends is below.
|
||||
|
||||
All items in the cache come with an expiry set - usually one week - but you
|
||||
can also configure a maximum cache size on dedicated cache datastores like
|
||||
Memcache. The key names used by the caches do not overlap, so there is
|
||||
no need to configure different key prefixes for each of Takahē's caches.
|
||||
|
||||
|
||||
Backends
|
||||
~~~~~~~~
|
||||
|
||||
Redis
|
||||
#####
|
||||
|
@ -140,11 +110,6 @@ Examples::
|
|||
A Redis-protocol server. Use ``redis://`` for unencrypted communication and
|
||||
``rediss://`` for TLS.
|
||||
|
||||
Redis has a large item size limit and is suitable for all caches. We recommend
|
||||
that you keep the DEFAULT cache separate from the MEDIA and AVATARS caches, and
|
||||
set the ``maxmemory`` on both to appropriate values (the proxying caches will
|
||||
need more memory than the DEFAULT cache).
|
||||
|
||||
|
||||
|
||||
Memcache
|
||||
|
@ -157,9 +122,6 @@ Examples::
|
|||
|
||||
A remote Memcache-protocol server (or set of servers).
|
||||
|
||||
Memcached has a 1MB limit per key by default, so this is only suitable for the
|
||||
DEFAULT cache and not the AVATARS or MEDIA cache.
|
||||
|
||||
|
||||
Filesystem
|
||||
##########
|
||||
|
@ -168,10 +130,8 @@ Examples::
|
|||
|
||||
file:///var/cache/takahe/
|
||||
|
||||
A cache on the local disk.
|
||||
|
||||
This *will* work with any of the cache backends, but is probably more suitable
|
||||
for MEDIA and AVATARS.
|
||||
A cache on the local disk. Slower than other options, and only really useful
|
||||
if you have no other choice.
|
||||
|
||||
Note that if you are running Takahē in a cluster, this cache will not be shared
|
||||
across different machines. This is not quite as bad as it first seems; it just
|
||||
|
@ -187,4 +147,52 @@ Examples::
|
|||
locmem://default
|
||||
|
||||
A local memory cache, inside the Python process. This will consume additional
|
||||
memory for the process, and should not be used with the MEDIA or AVATARS caches.
|
||||
memory for the process, and should be used with care.
|
||||
|
||||
|
||||
Image and Media Caching
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In order to protect your users' privacy and IP addresses, we can't just send
|
||||
them the remote URLs of user avatars and post images that aren't on your
|
||||
server; we instead need to proxy them through Takahē in order to obscure who
|
||||
is requesting them.
|
||||
|
||||
Some other ActivityPub servers do this by downloading all media and images as
|
||||
soon as they see it, and storing it all locally with some sort of clean-up job;
|
||||
Takahē instead opts for using a read-through cache for this task, which uses
|
||||
a bit more bandwidth in the long run but which has much easier maintenance and
|
||||
better failure modes.
|
||||
|
||||
Our Docker image comes with this cache built in, as without it you'll be making
|
||||
Python do a lot of file proxying on every page load (and it's not the best at
|
||||
that). It's set to 1GB of disk on each container by default, but you can adjust
|
||||
this by setting the ``TAKAHE_NGINX_CACHE_SIZE`` environment variable to a value
|
||||
Nginx understands, like ``10g``.
|
||||
|
||||
The cache directory is ``/cache/``, and you can mount a different disk into
|
||||
this path if you'd like to give it faster or more ephemeral storage.
|
||||
|
||||
If you have an external CDN or cache, you can also opt to add your own caching
|
||||
to these URLs; they all begin with ``/proxy/``, and have appropriate
|
||||
``Cache-Control`` headers set.
|
||||
|
||||
|
||||
CDNs
|
||||
----
|
||||
|
||||
Takahē can be run behind a CDN if you want to offset some of the load from the
|
||||
webserver containers. Takahē has to proxy all remote user avatars and images in
|
||||
order to protect the privacy of your users, and has a built-in cache to help
|
||||
with this (see "Caching" above), but at large scale this might start to get
|
||||
strained.
|
||||
|
||||
If you do run behind a CDN, ensure that your CDN is set to respect
|
||||
``Cache-Control`` headers from the origin rather than going purely off of file
|
||||
extensions. Some CDNs go purely off of file
|
||||
extensions by default, which will not capture all of the proxy views Takahē
|
||||
uses to show remote images without leaking user information.
|
||||
|
||||
If you don't want to use a CDN but still want a performance improvement, a
|
||||
read-through cache that respects ``Cache-Control``, like Varnish, will
|
||||
also help if placed in front of Takahē.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import httpx
|
||||
from django.conf import settings
|
||||
from django.core.cache import caches
|
||||
from django.http import Http404, HttpResponse
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.views.generic import View
|
||||
|
@ -9,46 +8,31 @@ from activities.models import Emoji, PostAttachment
|
|||
from users.models import Identity
|
||||
|
||||
|
||||
class BaseCacheView(View):
|
||||
class BaseProxyView(View):
|
||||
"""
|
||||
Base class for caching remote content.
|
||||
Base class for proxying remote content.
|
||||
"""
|
||||
|
||||
cache_name = "media"
|
||||
item_timeout: int | None = None
|
||||
|
||||
def get(self, request, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
remote_url = self.get_remote_url()
|
||||
cache = caches[self.cache_name]
|
||||
cache_key = "proxy_" + remote_url
|
||||
# See if it's already cached
|
||||
cached_content = cache.get(cache_key)
|
||||
if not cached_content:
|
||||
# OK, fetch and cache it
|
||||
try:
|
||||
remote_response = httpx.get(
|
||||
remote_url,
|
||||
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
|
||||
follow_redirects=True,
|
||||
timeout=settings.SETUP.REMOTE_TIMEOUT,
|
||||
)
|
||||
except httpx.RequestError:
|
||||
return HttpResponse(status=502)
|
||||
if remote_response.status_code >= 400:
|
||||
return HttpResponse(status=502)
|
||||
# We got it - shove it into the cache
|
||||
cached_content = {
|
||||
"content": remote_response.content,
|
||||
"mimetype": remote_response.headers.get(
|
||||
try:
|
||||
remote_response = httpx.get(
|
||||
remote_url,
|
||||
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
|
||||
follow_redirects=True,
|
||||
timeout=settings.SETUP.REMOTE_TIMEOUT,
|
||||
)
|
||||
except httpx.RequestError:
|
||||
return HttpResponse(status=502)
|
||||
if remote_response.status_code >= 400:
|
||||
return HttpResponse(status=502)
|
||||
return HttpResponse(
|
||||
remote_response.content,
|
||||
headers={
|
||||
"Content-Type": remote_response.headers.get(
|
||||
"Content-Type", "application/octet-stream"
|
||||
),
|
||||
}
|
||||
cache.set(cache_key, cached_content, timeout=self.item_timeout)
|
||||
return HttpResponse(
|
||||
cached_content["content"],
|
||||
headers={
|
||||
"Content-Type": cached_content["mimetype"],
|
||||
"Cache-Control": "public, max-age=3600",
|
||||
},
|
||||
)
|
||||
|
@ -57,13 +41,11 @@ class BaseCacheView(View):
|
|||
raise NotImplementedError()
|
||||
|
||||
|
||||
class EmojiCacheView(BaseCacheView):
|
||||
class EmojiCacheView(BaseProxyView):
|
||||
"""
|
||||
Caches Emoji
|
||||
Proxies Emoji
|
||||
"""
|
||||
|
||||
item_timeout = 86400 * 7 # One week
|
||||
|
||||
def get_remote_url(self):
|
||||
self.emoji = get_object_or_404(Emoji, pk=self.kwargs["emoji_id"])
|
||||
|
||||
|
@ -72,14 +54,11 @@ class EmojiCacheView(BaseCacheView):
|
|||
return self.emoji.remote_url
|
||||
|
||||
|
||||
class IdentityIconCacheView(BaseCacheView):
|
||||
class IdentityIconCacheView(BaseProxyView):
|
||||
"""
|
||||
Caches identity icons (avatars)
|
||||
Proxies identity icons (avatars)
|
||||
"""
|
||||
|
||||
cache_name = "avatars"
|
||||
item_timeout = 86400 * 7 # One week
|
||||
|
||||
def get_remote_url(self):
|
||||
self.identity = get_object_or_404(Identity, pk=self.kwargs["identity_id"])
|
||||
if self.identity.local or not self.identity.icon_uri:
|
||||
|
@ -87,13 +66,11 @@ class IdentityIconCacheView(BaseCacheView):
|
|||
return self.identity.icon_uri
|
||||
|
||||
|
||||
class IdentityImageCacheView(BaseCacheView):
|
||||
class IdentityImageCacheView(BaseProxyView):
|
||||
"""
|
||||
Caches identity profile header images
|
||||
Proxies identity profile header images
|
||||
"""
|
||||
|
||||
item_timeout = 86400 * 7 # One week
|
||||
|
||||
def get_remote_url(self):
|
||||
self.identity = get_object_or_404(Identity, pk=self.kwargs["identity_id"])
|
||||
if self.identity.local or not self.identity.image_uri:
|
||||
|
@ -101,13 +78,11 @@ class IdentityImageCacheView(BaseCacheView):
|
|||
return self.identity.image_uri
|
||||
|
||||
|
||||
class PostAttachmentCacheView(BaseCacheView):
|
||||
class PostAttachmentCacheView(BaseProxyView):
|
||||
"""
|
||||
Caches post media (images only, videos should always be offloaded to remote)
|
||||
Proxies post media (images only, videos should always be offloaded to remote)
|
||||
"""
|
||||
|
||||
item_timeout = 86400 * 7 # One week
|
||||
|
||||
def get_remote_url(self):
|
||||
self.post_attachment = get_object_or_404(
|
||||
PostAttachment, pk=self.kwargs["attachment_id"]
|
||||
|
|
|
@ -127,12 +127,6 @@ class Settings(BaseSettings):
|
|||
#: Default cache backend
|
||||
CACHES_DEFAULT: CacheBackendUrl | None = None
|
||||
|
||||
#: User icon (avatar) caching backend
|
||||
CACHES_AVATARS: CacheBackendUrl | None = None
|
||||
|
||||
#: Media caching backend
|
||||
CACHES_MEDIA: CacheBackendUrl | None = None
|
||||
|
||||
PGHOST: str | None = None
|
||||
PGPORT: int | None = 5432
|
||||
PGNAME: str = "takahe"
|
||||
|
@ -385,8 +379,6 @@ if SETUP.MEDIA_BACKEND:
|
|||
|
||||
CACHES = {
|
||||
"default": django_cache_url.parse(SETUP.CACHES_DEFAULT or "dummy://"),
|
||||
"avatars": django_cache_url.parse(SETUP.CACHES_AVATARS or "dummy://"),
|
||||
"media": django_cache_url.parse(SETUP.CACHES_MEDIA or "dummy://"),
|
||||
}
|
||||
|
||||
if SETUP.ERROR_EMAILS:
|
||||
|
|
Loading…
Reference in New Issue