initial commit
This commit is contained in:
commit
1d8b4fd8c9
|
@ -0,0 +1,3 @@
|
|||
*.toml
|
||||
!*.example.toml
|
||||
*.timestamp
|
|
@ -0,0 +1,4 @@
|
|||
site = "https://botsin.space"
|
||||
access_token = ""
|
||||
timestamp_path = "last_post.timestamp"
|
||||
account = "example_1@example.com"
|
|
@ -0,0 +1,174 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifer: AGPL-3.0-only
|
||||
|
||||
import io
|
||||
import sys
|
||||
import anyio
|
||||
import aiohttp
|
||||
import pleroma
|
||||
import argparse
|
||||
import platform
|
||||
import pendulum
|
||||
import aiosqlite
|
||||
import contextlib
|
||||
import qtoml as toml
|
||||
from utils import suppress
|
||||
from pleroma import Pleroma
|
||||
from functools import partial
|
||||
|
||||
USER_AGENT = (
|
||||
'mirror-bot; '
|
||||
f'{aiohttp.__version__}; '
|
||||
f'{platform.python_implementation()}/{platform.python_version()}'
|
||||
)
|
||||
|
||||
UTC = pendulum.timezone('UTC')
|
||||
JSON_CONTENT_TYPE = 'application/json'
|
||||
ACTIVITYPUB_CONTENT_TYPE = 'application/activity+json'
|
||||
|
||||
MIGRATION_VERSION = 1
|
||||
|
||||
class PostMirror:
|
||||
def __init__(self, *, config):
|
||||
self.config = config
|
||||
|
||||
async def __aenter__(self):
|
||||
stack = contextlib.AsyncExitStack()
|
||||
self._fedi = await stack.enter_async_context(
|
||||
Pleroma(api_base_url=self.config['site'], access_token=self.config['access_token']),
|
||||
)
|
||||
self._http = await stack.enter_async_context(
|
||||
aiohttp.ClientSession(
|
||||
headers={
|
||||
'User-Agent': USER_AGENT,
|
||||
'Accept': ', '.join([JSON_CONTENT_TYPE, ACTIVITYPUB_CONTENT_TYPE]),
|
||||
},
|
||||
trust_env=True,
|
||||
raise_for_status=True,
|
||||
),
|
||||
)
|
||||
self._ctx_stack = stack
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *excinfo):
|
||||
return await self._ctx_stack.__aexit__(*excinfo)
|
||||
|
||||
async def mirror_posts(self):
|
||||
outbox = await self.fetch_outbox(self.config['account'])
|
||||
async with self._http.get(outbox['first']) as resp: page = await resp.json()
|
||||
last_post = page['orderedItems'][0]['object']
|
||||
|
||||
try:
|
||||
with open(self.config['timestamp_path']) as f:
|
||||
last_mirrored_ts = pendulum.from_timestamp(float(f.read()))
|
||||
except FileNotFoundError:
|
||||
last_mirrored_ts = pendulum.from_timestamp(0.0)
|
||||
|
||||
last_post_ts = pendulum.parse(last_post['published'])
|
||||
|
||||
if last_post_ts < last_mirrored_ts:
|
||||
print('Nothing to do')
|
||||
return
|
||||
|
||||
# mirror the post and all its attachments
|
||||
attachments = [None] * len(last_post['attachment'])
|
||||
async with anyio.create_task_group() as tg:
|
||||
for i, attachment in enumerate(last_post['attachment']):
|
||||
tg.start_soon(self._mirror_attachment, i, attachments, attachment)
|
||||
assert None not in attachments
|
||||
await self._fedi.post(
|
||||
last_post['source'],
|
||||
cw=last_post['summary'],
|
||||
visibility='unlisted',
|
||||
media_ids=attachments,
|
||||
)
|
||||
|
||||
with open(self.config['timestamp_path'], 'w') as f:
|
||||
f.write(str(pendulum.now('UTC').timestamp()))
|
||||
|
||||
async def _mirror_attachment(self, i, out_attachments, attachment):
|
||||
async with self._http.get(attachment['url']) as resp:
|
||||
data = await resp.read()
|
||||
out_attachments[i] = (await self._fedi.post_media(
|
||||
io.BytesIO(data),
|
||||
attachment['mediaType'],
|
||||
filename=attachment['name'],
|
||||
# TODO support descriptions
|
||||
))['id']
|
||||
|
||||
async def fetch_outbox(self, handle):
|
||||
"""
|
||||
finger handle, a fully-qualified ActivityPub actor name,
|
||||
returning their outbox info
|
||||
"""
|
||||
# it's fucking incredible how overengineered ActivityPub is btw
|
||||
print('Fingering ', handle, '...', sep='')
|
||||
|
||||
username, at, instance = handle.lstrip('@').partition('@')
|
||||
assert at == '@'
|
||||
|
||||
# i was planning on doing /.well-known/host-meta to find the webfinger URL, but
|
||||
# 1) honk does not support host-meta
|
||||
# 2) WebFinger is always located at the same location anyway
|
||||
|
||||
profile_url = await self._finger_actor(username, instance)
|
||||
|
||||
try:
|
||||
async with self._http.get(profile_url) as resp: profile = await resp.json()
|
||||
except aiohttp.ContentTypeError:
|
||||
# we didn't get JSON, so just guess the outbox URL
|
||||
outbox_url = profile_url + '/outbox'
|
||||
else:
|
||||
outbox_url = profile['outbox']
|
||||
|
||||
async with self._http.get(outbox_url) as resp: outbox = await resp.json()
|
||||
assert outbox['type'] == 'OrderedCollection'
|
||||
return outbox
|
||||
|
||||
async def _finger_actor(self, username, instance):
|
||||
# despite HTTP being a direct violation of the WebFinger spec, assume e.g. Tor instances do not support
|
||||
# HTTPS-over-onion
|
||||
finger_url = f'http://{instance}/.well-known/webfinger?resource=acct:{username}@{instance}'
|
||||
async with self._http.get(finger_url) as resp: finger_result = await resp.json()
|
||||
return (profile_url := self._parse_webfinger_result(username, instance, finger_result))
|
||||
|
||||
def _parse_webfinger_result(self, username, instance, finger_result):
|
||||
"""given webfinger data, return profile URL for handle"""
|
||||
def check_content_type(type, ct): return ct == type or ct.startswith(type+';')
|
||||
check_ap = partial(check_content_type, ACTIVITYPUB_CONTENT_TYPE)
|
||||
|
||||
try:
|
||||
# note: the server might decide to return multiple links
|
||||
# so we need to decide how to prefer one.
|
||||
# i'd put "and yarl.URL(template).host == instance" here,
|
||||
# but some instances have no subdomain for the handle yet use a subdomain for the canonical URL.
|
||||
# Additionally, an instance could theoretically serve profile pages over I2P and the clearnet,
|
||||
# for example.
|
||||
return (profile_url := next(
|
||||
link['href']
|
||||
for link in finger_result['links']
|
||||
if link['rel'] == 'self' and check_ap(link['type'])
|
||||
))
|
||||
except StopIteration:
|
||||
# this should never happen either
|
||||
raise RuntimeError(f'fatal: while fingering {username}@{instance}, failed to find a profile URL')
|
||||
|
||||
async def amain():
|
||||
parser = argparse.ArgumentParser(description='Mirror posts from another fediverse account')
|
||||
parser.add_argument(
|
||||
'-c', '--cfg', dest='cfg', default='config.toml', nargs='?',
|
||||
help='Specify a custom location for the config file.'
|
||||
)
|
||||
args = parser.parse_args()
|
||||
with open(args.cfg) as f:
|
||||
config = toml.load(f)
|
||||
async with PostMirror(config=config) as pm: await pm.mirror_posts()
|
||||
|
||||
def main():
|
||||
try:
|
||||
anyio.run(amain)
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,183 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
import sys
|
||||
import yarl
|
||||
import json
|
||||
import hashlib
|
||||
import aiohttp
|
||||
from multidict import MultiDict
|
||||
from http import HTTPStatus
|
||||
|
||||
def http_session_factory(headers={}):
|
||||
py_version = '.'.join(map(str, sys.version_info))
|
||||
user_agent = (
|
||||
'pleroma-ebooks (https://lab.freak.university/KayFaraday/pleroma-ebooks); '
|
||||
'aiohttp/{aiohttp.__version__}; '
|
||||
'python/{py_version}'
|
||||
)
|
||||
return aiohttp.ClientSession(
|
||||
headers={'User-Agent': user_agent, **headers},
|
||||
)
|
||||
|
||||
class BadRequest(Exception):
|
||||
pass
|
||||
|
||||
class LoginFailed(Exception):
|
||||
pass
|
||||
|
||||
class Pleroma:
|
||||
def __init__(self, *, api_base_url, access_token):
|
||||
self.api_base_url = api_base_url.rstrip('/')
|
||||
self.access_token = access_token.strip()
|
||||
self._session = http_session_factory({'Authorization': 'Bearer ' + self.access_token})
|
||||
self._logged_in_id = None
|
||||
|
||||
async def __aenter__(self):
|
||||
self._session = await self._session.__aenter__()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *excinfo):
|
||||
return await self._session.__aexit__(*excinfo)
|
||||
|
||||
async def request(self, method, path, **kwargs):
|
||||
# blocklist of some horrible instances
|
||||
if hashlib.sha256(
|
||||
yarl.URL(self.api_base_url).host.encode()
|
||||
+ bytes.fromhex('d590e3c48d599db6776e89dfc8ebaf53c8cd84866a76305049d8d8c5d4126ce1')
|
||||
).hexdigest() in {
|
||||
'56704d4d95b882e81c8e7765e9079be0afc4e353925ba9add8fd65976f52db83',
|
||||
'1932431fa41a0baaccce7815115b01e40e0237035bb155713712075b887f5a19',
|
||||
'a42191105a9f3514a1d5131969c07a95e06d0fdf0058f18e478823bf299881c9',
|
||||
}:
|
||||
raise RuntimeError('stop being a chud')
|
||||
|
||||
async with self._session.request(method, self.api_base_url + path, **kwargs) as resp:
|
||||
if resp.status == HTTPStatus.BAD_REQUEST:
|
||||
raise BadRequest((await resp.json())['error'])
|
||||
#resp.raise_for_status()
|
||||
return await resp.json()
|
||||
|
||||
async def verify_credentials(self):
|
||||
return await self.request('GET', '/api/v1/accounts/verify_credentials')
|
||||
|
||||
me = verify_credentials
|
||||
|
||||
async def _get_logged_in_id(self):
|
||||
if self._logged_in_id is not None:
|
||||
return self._logged_in_id
|
||||
|
||||
me = await self.me()
|
||||
|
||||
try:
|
||||
self._logged_in_id = me['id']
|
||||
except KeyError:
|
||||
raise LoginFailed(me)
|
||||
|
||||
return self._logged_in_id
|
||||
|
||||
async def following(self, account_id=None):
|
||||
account_id = account_id or await self._get_logged_in_id()
|
||||
return await self.request('GET', f'/api/v1/accounts/{account_id}/following')
|
||||
|
||||
@staticmethod
|
||||
def _unpack_id(obj):
|
||||
if isinstance(obj, dict) and 'id' in obj:
|
||||
return obj['id']
|
||||
return obj
|
||||
|
||||
async def status_context(self, id):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('GET', f'/api/v1/statuses/{id}/context')
|
||||
|
||||
async def post(self, content, *, in_reply_to_id=None, cw=None, visibility=None, media_ids=()):
|
||||
if visibility not in {None, 'private', 'public', 'unlisted', 'direct'}:
|
||||
raise ValueError('invalid visibility', visibility)
|
||||
|
||||
data = MultiDict(status=content)
|
||||
if in_reply_to_id := self._unpack_id(in_reply_to_id):
|
||||
data['in_reply_to_id'] = in_reply_to_id
|
||||
if visibility is not None:
|
||||
data['visibility'] = visibility
|
||||
# normally, this would be a check against None.
|
||||
# however, apparently Pleroma serializes posts without CWs as posts with an empty string
|
||||
# as a CW, so per the robustness principle we'll accept that too.
|
||||
if cw:
|
||||
data['spoiler_text'] = cw
|
||||
|
||||
for media_id in media_ids:
|
||||
data.add('media_ids[]', media_id)
|
||||
|
||||
return await self.request('POST', '/api/v1/statuses', data=data)
|
||||
|
||||
async def post_media(self, fp, mime_type, filename=None, description=None, focus=None):
|
||||
data = aiohttp.FormData()
|
||||
data.add_field('file', fp, content_type=mime_type, filename=filename)
|
||||
if description is not None:
|
||||
data.add_field('description', description)
|
||||
if focus is not None:
|
||||
data.add_field(','.join(map(str, focus)))
|
||||
return await self.request('POST', '/api/v2/media', data=data)
|
||||
|
||||
async def reply(self, to_status, content, *, cw=None):
|
||||
user_id = await self._get_logged_in_id()
|
||||
|
||||
mentioned_accounts = {}
|
||||
mentioned_accounts[to_status['account']['id']] = to_status['account']['acct']
|
||||
for account in to_status['mentions']:
|
||||
if account['id'] != user_id and account['id'] not in mentioned_accounts:
|
||||
mentioned_accounts[account['id']] = account['acct']
|
||||
|
||||
content = ''.join('@' + x + ' ' for x in mentioned_accounts.values()) + content
|
||||
|
||||
visibility = 'unlisted' if to_status['visibility'] == 'public' else to_status['visibility']
|
||||
if not cw and 'spoiler_text' in to_status and to_status['spoiler_text']:
|
||||
cw = 're: ' + to_status['spoiler_text']
|
||||
|
||||
return await self.post(content, in_reply_to_id=to_status['id'], cw=cw, visibility=visibility)
|
||||
|
||||
async def favorite(self, id):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('POST', f'/api/v1/statuses/{id}/favourite')
|
||||
|
||||
async def unfavorite(self, id):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('POST', f'/api/v1/statuses/{id}/unfavourite')
|
||||
|
||||
async def react(self, id, reaction):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('PUT', f'/api/v1/pleroma/statuses/{id}/reactions/{reaction}')
|
||||
|
||||
async def remove_reaction(self, id, reaction):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('DELETE', f'/api/v1/pleroma/statuses/{id}/reactions/{reaction}')
|
||||
|
||||
async def pin(self, id):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('POST', f'/api/v1/statuses/{id}/pin')
|
||||
|
||||
async def unpin(self, id):
|
||||
id = self._unpack_id(id)
|
||||
return await self.request('POST', f'/api/v1/statuses/{id}/unpin')
|
||||
|
||||
async def stream(self, stream_name, *, target_event_type=None):
|
||||
async with self._session.ws_connect(
|
||||
self.api_base_url + f'/api/v1/streaming?stream={stream_name}&access_token={self.access_token}'
|
||||
) as ws:
|
||||
async for msg in ws:
|
||||
if msg.type == aiohttp.WSMsgType.TEXT:
|
||||
event = msg.json()
|
||||
# the only event type that doesn't define `payload` is `filters_changed`
|
||||
if event['event'] == 'filters_changed':
|
||||
yield event
|
||||
elif target_event_type is None or event['event'] == target_event_type:
|
||||
# don't ask me why the payload is also JSON encoded smh
|
||||
yield json.loads(event['payload'])
|
||||
|
||||
async def stream_notifications(self):
|
||||
async for notif in self.stream('user:notification', target_event_type='notification'):
|
||||
yield notif
|
||||
|
||||
async def stream_mentions(self):
|
||||
async for notif in self.stream_notifications():
|
||||
if notif['type'] == 'mention':
|
||||
yield notif
|
|
@ -0,0 +1,5 @@
|
|||
aiohttp ~= 3.0
|
||||
qtoml ~= 0.3.1
|
||||
anyio ~= 3.0
|
||||
aiosqlite ~= 0.17.0
|
||||
pendulum ~= 2.0
|
|
@ -0,0 +1,21 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
import anyio
|
||||
import contextlib
|
||||
from functools import wraps
|
||||
|
||||
def as_corofunc(f):
|
||||
@wraps(f)
|
||||
async def wrapped(*args, **kwargs):
|
||||
# can't decide if i want an `anyio.sleep(0)` here.
|
||||
return f(*args, **kwargs)
|
||||
return wrapped
|
||||
|
||||
def as_async_cm(cls):
|
||||
@wraps(cls, updated=()) # cls.__dict__ doesn't support .update()
|
||||
class wrapped(cls, contextlib.AbstractAsyncContextManager):
|
||||
__aenter__ = as_corofunc(cls.__enter__)
|
||||
__aexit__ = as_corofunc(cls.__exit__)
|
||||
return wrapped
|
||||
|
||||
suppress = as_async_cm(contextlib.suppress)
|
Loading…
Reference in New Issue