mirror all posts since last ran (fixes #1)
This commit is contained in:
parent
da515baf0c
commit
40e3867f82
|
@ -4,6 +4,7 @@
|
||||||
import io
|
import io
|
||||||
import sys
|
import sys
|
||||||
import anyio
|
import anyio
|
||||||
|
import cursor
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import pleroma
|
import pleroma
|
||||||
import argparse
|
import argparse
|
||||||
|
@ -12,7 +13,7 @@ import pendulum
|
||||||
import aiosqlite
|
import aiosqlite
|
||||||
import contextlib
|
import contextlib
|
||||||
import qtoml as toml
|
import qtoml as toml
|
||||||
from utils import suppress
|
from utils import suppress, loading_spinner
|
||||||
from pleroma import Pleroma
|
from pleroma import Pleroma
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
|
@ -26,8 +27,6 @@ UTC = pendulum.timezone('UTC')
|
||||||
JSON_CONTENT_TYPE = 'application/json'
|
JSON_CONTENT_TYPE = 'application/json'
|
||||||
ACTIVITYPUB_CONTENT_TYPE = 'application/activity+json'
|
ACTIVITYPUB_CONTENT_TYPE = 'application/activity+json'
|
||||||
|
|
||||||
MIGRATION_VERSION = 1
|
|
||||||
|
|
||||||
class PostMirror:
|
class PostMirror:
|
||||||
def __init__(self, *, config):
|
def __init__(self, *, config):
|
||||||
self.config = config
|
self.config = config
|
||||||
|
@ -54,38 +53,73 @@ class PostMirror:
|
||||||
return await self._ctx_stack.__aexit__(*excinfo)
|
return await self._ctx_stack.__aexit__(*excinfo)
|
||||||
|
|
||||||
async def mirror_posts(self):
|
async def mirror_posts(self):
|
||||||
|
spinner = loading_spinner()
|
||||||
outbox = await self.fetch_outbox(self.config['account'])
|
outbox = await self.fetch_outbox(self.config['account'])
|
||||||
async with self._http.get(outbox['first']) as resp: page = await resp.json()
|
|
||||||
last_post = page['orderedItems'][0]['object']
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(self.config['timestamp_path']) as f:
|
with open(self.config['timestamp_path']) as f:
|
||||||
last_mirrored_ts = pendulum.from_timestamp(float(f.read()))
|
last_mirrored_at = pendulum.from_timestamp(float(f.read()))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
last_mirrored_ts = pendulum.from_timestamp(0.0)
|
last_mirrored_at = pendulum.from_timestamp(0.0)
|
||||||
|
|
||||||
last_post_ts = pendulum.parse(last_post['published'])
|
page_url = outbox['first']
|
||||||
|
posts = []
|
||||||
|
print('Fetching posts to mirror...', end=' ')
|
||||||
|
cursor.hide()
|
||||||
|
done = False
|
||||||
|
while not done:
|
||||||
|
async with self._http.get(page_url) as resp: page = await resp.json()
|
||||||
|
try:
|
||||||
|
page_url = page['next']
|
||||||
|
except KeyError:
|
||||||
|
done = True
|
||||||
|
|
||||||
if last_post_ts < last_mirrored_ts:
|
print(next(spinner), end='', flush=True)
|
||||||
|
|
||||||
|
for item in page['orderedItems']:
|
||||||
|
post = item['object']
|
||||||
|
published_at = pendulum.parse(post['published'])
|
||||||
|
if published_at < last_mirrored_at:
|
||||||
|
done = True
|
||||||
|
break
|
||||||
|
posts.append(post)
|
||||||
|
|
||||||
|
print()
|
||||||
|
cursor.show()
|
||||||
|
|
||||||
|
if not posts:
|
||||||
print('Nothing to do')
|
print('Nothing to do')
|
||||||
return
|
return
|
||||||
|
|
||||||
# mirror the post and all its attachments
|
print('Mirroring posts...', end=' ')
|
||||||
attachments = [None] * len(last_post['attachment'])
|
cursor.hide()
|
||||||
async with anyio.create_task_group() as tg:
|
for post in reversed(posts): # oldest to newest
|
||||||
for i, attachment in enumerate(last_post['attachment']):
|
# we use for ... await instead of a task group in order to ensure order is preserved
|
||||||
tg.start_soon(self._mirror_attachment, i, attachments, attachment)
|
# TODO mirror all attachments (from all posts) in parallel
|
||||||
assert None not in attachments
|
await self._mirror_post(post)
|
||||||
await self._fedi.post(
|
print(next(spinner), end='', flush=True)
|
||||||
last_post['source'],
|
|
||||||
cw=last_post['summary'],
|
print()
|
||||||
visibility='unlisted',
|
cursor.show()
|
||||||
media_ids=attachments,
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(self.config['timestamp_path'], 'w') as f:
|
with open(self.config['timestamp_path'], 'w') as f:
|
||||||
f.write(str(pendulum.now('UTC').timestamp()))
|
f.write(str(pendulum.now('UTC').timestamp()))
|
||||||
|
|
||||||
|
async def _mirror_post(self, post):
|
||||||
|
attachments = [None] * len(post['attachment'])
|
||||||
|
async with anyio.create_task_group() as tg:
|
||||||
|
for i, attachment in enumerate(post['attachment']):
|
||||||
|
tg.start_soon(self._mirror_attachment, i, attachments, attachment)
|
||||||
|
|
||||||
|
assert None not in attachments
|
||||||
|
|
||||||
|
await self._fedi.post(
|
||||||
|
post['source'],
|
||||||
|
cw=post['summary'],
|
||||||
|
visibility='unlisted',
|
||||||
|
media_ids=attachments,
|
||||||
|
)
|
||||||
|
|
||||||
async def _mirror_attachment(self, i, out_attachments, attachment):
|
async def _mirror_attachment(self, i, out_attachments, attachment):
|
||||||
async with self._http.get(attachment['url']) as resp:
|
async with self._http.get(attachment['url']) as resp:
|
||||||
data = await resp.read()
|
data = await resp.read()
|
||||||
|
@ -168,6 +202,7 @@ def main():
|
||||||
try:
|
try:
|
||||||
anyio.run(amain)
|
anyio.run(amain)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
cursor.show()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -3,3 +3,4 @@ qtoml ~= 0.3.1
|
||||||
anyio ~= 3.0
|
anyio ~= 3.0
|
||||||
aiosqlite ~= 0.17.0
|
aiosqlite ~= 0.17.0
|
||||||
pendulum ~= 2.0
|
pendulum ~= 2.0
|
||||||
|
cursor ~= 1.3
|
||||||
|
|
15
utils.py
15
utils.py
|
@ -1,6 +1,7 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-only
|
# SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
import anyio
|
import anyio
|
||||||
|
import itertools
|
||||||
import contextlib
|
import contextlib
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
|
@ -19,3 +20,17 @@ def as_async_cm(cls):
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
suppress = as_async_cm(contextlib.suppress)
|
suppress = as_async_cm(contextlib.suppress)
|
||||||
|
|
||||||
|
def loading_spinner():
|
||||||
|
return itertools.cycle('\b' + x for x in [
|
||||||
|
'⠋',
|
||||||
|
'⠙',
|
||||||
|
'⠹',
|
||||||
|
'⠸',
|
||||||
|
'⠼',
|
||||||
|
'⠴',
|
||||||
|
'⠦',
|
||||||
|
'⠧',
|
||||||
|
'⠇',
|
||||||
|
'⠏',
|
||||||
|
])
|
||||||
|
|
Loading…
Reference in New Issue