From b73526a895cfbcc96b2f40340aeb84288318485c Mon Sep 17 00:00:00 2001 From: Kay Faraday Date: Wed, 15 Jun 2022 19:53:43 +0000 Subject: [PATCH] simplify account fetch by removing the WebFinger step Now we fetch the user's profile URL from our home server, skipping the need to WebFinger them and also supporting instances that don't support http:// connections. --- fetch_posts.py | 49 +++++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/fetch_posts.py b/fetch_posts.py index c7e8c9b..d84809f 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -75,32 +75,24 @@ class PostFetcher: async def __aexit__(self, *excinfo): return await self._ctx_stack.__aexit__(*excinfo) - # username@instance - AccountHandle = NewType('AccountHandle', str) async def fetch_all(self): """fetch all following accounts, or an iterable of accounts if provided""" await self._fedi.verify_credentials() self._completed_accounts = {} async with anyio.create_task_group() as tg: - for fqn in map(self.fqn, await self._fedi.following()): - tg.start_soon(self._do_account, fqn) + # XXX it's assumed that no more than one API page of people are being followed at one time + for account in await self._fedi.following(): + profile_url = account['url'] + tg.start_soon(self._do_account, profile_url) - def fqn(self, acc: dict): - try: - return acc['fqn'] - except KeyError: - fqn = acc['acct'] - if '@' in fqn: return fqn - return fqn + '@' + URL(self.config['site']).host - - async def _do_account(self, acc: AccountHandle): + async def _do_account(self, profile_url: str): async with anyio.create_task_group() as tg: - self._completed_accounts[acc] = done_ev = anyio.Event() + self._completed_accounts[profile_url] = done_ev = anyio.Event() tx, rx = anyio.create_memory_object_stream() async with rx, tx: - tg.start_soon(self._process_pages, rx, acc) - tg.start_soon(self._fetch_account, tx, acc) + tg.start_soon(self._process_pages, rx, profile_url) + tg.start_soon(self._fetch_account, tx, profile_url) await done_ev.wait() # processing is complete, so halt fetching. # processing may complete before fetching if we get caught up on new posts. @@ -154,19 +146,19 @@ class PostFetcher: # TODO figure out why i put shield here lol @shield - async def _fetch_account(self, tx, account: AccountHandle): - done_ev = self._completed_accounts[account] + async def _fetch_account(self, tx, profile_url): + done_ev = self._completed_accounts[profile_url] try: - outbox = await self.fetch_outbox(account) + outbox = await self.fetch_outbox(profile_url) except Exception as exc: import traceback traceback.print_exception(type(exc), exc, exc.__traceback__) done_ev.set() - self.erroneous_accounts.append(account) + self.erroneous_accounts.append(profile_url) return - print(f'Fetching posts for {account}...') + print(f'Fetching posts for {profile_url}...') next_page_url = outbox['first'] while True: @@ -189,19 +181,8 @@ class PostFetcher: done_ev.set() - async def fetch_outbox(self, handle): - """finger handle, a fully-qualified ActivityPub actor name, returning their outbox URL""" - # it's fucking incredible how overengineered ActivityPub is btw - print('Fingering ', handle, '...', sep='') - - username, at, instance = handle.lstrip('@').partition('@') - assert at == '@' - - # i was planning on doing /.well-known/host-meta to find the webfinger URL, but - # 1) honk does not support host-meta - # 2) WebFinger is always located at the same location anyway - - profile_url = await self._finger_actor(username, instance) + async def fetch_outbox(self, profile_url): + """fetch the first page of the outbox for the given ActivityPub profile URL""" try: async with self._http.get(profile_url) as resp: profile = await resp.json()