diff --git a/fetch_posts.py b/fetch_posts.py index c7e8c9b..d84809f 100755 --- a/fetch_posts.py +++ b/fetch_posts.py @@ -75,32 +75,24 @@ class PostFetcher: async def __aexit__(self, *excinfo): return await self._ctx_stack.__aexit__(*excinfo) - # username@instance - AccountHandle = NewType('AccountHandle', str) async def fetch_all(self): """fetch all following accounts, or an iterable of accounts if provided""" await self._fedi.verify_credentials() self._completed_accounts = {} async with anyio.create_task_group() as tg: - for fqn in map(self.fqn, await self._fedi.following()): - tg.start_soon(self._do_account, fqn) + # XXX it's assumed that no more than one API page of people are being followed at one time + for account in await self._fedi.following(): + profile_url = account['url'] + tg.start_soon(self._do_account, profile_url) - def fqn(self, acc: dict): - try: - return acc['fqn'] - except KeyError: - fqn = acc['acct'] - if '@' in fqn: return fqn - return fqn + '@' + URL(self.config['site']).host - - async def _do_account(self, acc: AccountHandle): + async def _do_account(self, profile_url: str): async with anyio.create_task_group() as tg: - self._completed_accounts[acc] = done_ev = anyio.Event() + self._completed_accounts[profile_url] = done_ev = anyio.Event() tx, rx = anyio.create_memory_object_stream() async with rx, tx: - tg.start_soon(self._process_pages, rx, acc) - tg.start_soon(self._fetch_account, tx, acc) + tg.start_soon(self._process_pages, rx, profile_url) + tg.start_soon(self._fetch_account, tx, profile_url) await done_ev.wait() # processing is complete, so halt fetching. # processing may complete before fetching if we get caught up on new posts. @@ -154,19 +146,19 @@ class PostFetcher: # TODO figure out why i put shield here lol @shield - async def _fetch_account(self, tx, account: AccountHandle): - done_ev = self._completed_accounts[account] + async def _fetch_account(self, tx, profile_url): + done_ev = self._completed_accounts[profile_url] try: - outbox = await self.fetch_outbox(account) + outbox = await self.fetch_outbox(profile_url) except Exception as exc: import traceback traceback.print_exception(type(exc), exc, exc.__traceback__) done_ev.set() - self.erroneous_accounts.append(account) + self.erroneous_accounts.append(profile_url) return - print(f'Fetching posts for {account}...') + print(f'Fetching posts for {profile_url}...') next_page_url = outbox['first'] while True: @@ -189,19 +181,8 @@ class PostFetcher: done_ev.set() - async def fetch_outbox(self, handle): - """finger handle, a fully-qualified ActivityPub actor name, returning their outbox URL""" - # it's fucking incredible how overengineered ActivityPub is btw - print('Fingering ', handle, '...', sep='') - - username, at, instance = handle.lstrip('@').partition('@') - assert at == '@' - - # i was planning on doing /.well-known/host-meta to find the webfinger URL, but - # 1) honk does not support host-meta - # 2) WebFinger is always located at the same location anyway - - profile_url = await self._finger_actor(username, instance) + async def fetch_outbox(self, profile_url): + """fetch the first page of the outbox for the given ActivityPub profile URL""" try: async with self._http.get(profile_url) as resp: profile = await resp.json()