simplify account fetch by removing the WebFinger step

Now we fetch the user's profile URL from our home server, skipping the need to WebFinger them
and also supporting instances that don't support http:// connections.
This commit is contained in:
Kay Faraday 2022-06-15 19:53:43 +00:00
parent 61e05de199
commit b73526a895
1 changed files with 15 additions and 34 deletions

View File

@ -75,32 +75,24 @@ class PostFetcher:
async def __aexit__(self, *excinfo): async def __aexit__(self, *excinfo):
return await self._ctx_stack.__aexit__(*excinfo) return await self._ctx_stack.__aexit__(*excinfo)
# username@instance
AccountHandle = NewType('AccountHandle', str)
async def fetch_all(self): async def fetch_all(self):
"""fetch all following accounts, or an iterable of accounts if provided""" """fetch all following accounts, or an iterable of accounts if provided"""
await self._fedi.verify_credentials() await self._fedi.verify_credentials()
self._completed_accounts = {} self._completed_accounts = {}
async with anyio.create_task_group() as tg: async with anyio.create_task_group() as tg:
for fqn in map(self.fqn, await self._fedi.following()): # XXX it's assumed that no more than one API page of people are being followed at one time
tg.start_soon(self._do_account, fqn) for account in await self._fedi.following():
profile_url = account['url']
tg.start_soon(self._do_account, profile_url)
def fqn(self, acc: dict): async def _do_account(self, profile_url: str):
try:
return acc['fqn']
except KeyError:
fqn = acc['acct']
if '@' in fqn: return fqn
return fqn + '@' + URL(self.config['site']).host
async def _do_account(self, acc: AccountHandle):
async with anyio.create_task_group() as tg: async with anyio.create_task_group() as tg:
self._completed_accounts[acc] = done_ev = anyio.Event() self._completed_accounts[profile_url] = done_ev = anyio.Event()
tx, rx = anyio.create_memory_object_stream() tx, rx = anyio.create_memory_object_stream()
async with rx, tx: async with rx, tx:
tg.start_soon(self._process_pages, rx, acc) tg.start_soon(self._process_pages, rx, profile_url)
tg.start_soon(self._fetch_account, tx, acc) tg.start_soon(self._fetch_account, tx, profile_url)
await done_ev.wait() await done_ev.wait()
# processing is complete, so halt fetching. # processing is complete, so halt fetching.
# processing may complete before fetching if we get caught up on new posts. # processing may complete before fetching if we get caught up on new posts.
@ -154,19 +146,19 @@ class PostFetcher:
# TODO figure out why i put shield here lol # TODO figure out why i put shield here lol
@shield @shield
async def _fetch_account(self, tx, account: AccountHandle): async def _fetch_account(self, tx, profile_url):
done_ev = self._completed_accounts[account] done_ev = self._completed_accounts[profile_url]
try: try:
outbox = await self.fetch_outbox(account) outbox = await self.fetch_outbox(profile_url)
except Exception as exc: except Exception as exc:
import traceback import traceback
traceback.print_exception(type(exc), exc, exc.__traceback__) traceback.print_exception(type(exc), exc, exc.__traceback__)
done_ev.set() done_ev.set()
self.erroneous_accounts.append(account) self.erroneous_accounts.append(profile_url)
return return
print(f'Fetching posts for {account}...') print(f'Fetching posts for {profile_url}...')
next_page_url = outbox['first'] next_page_url = outbox['first']
while True: while True:
@ -189,19 +181,8 @@ class PostFetcher:
done_ev.set() done_ev.set()
async def fetch_outbox(self, handle): async def fetch_outbox(self, profile_url):
"""finger handle, a fully-qualified ActivityPub actor name, returning their outbox URL""" """fetch the first page of the outbox for the given ActivityPub profile URL"""
# it's fucking incredible how overengineered ActivityPub is btw
print('Fingering ', handle, '...', sep='')
username, at, instance = handle.lstrip('@').partition('@')
assert at == '@'
# i was planning on doing /.well-known/host-meta to find the webfinger URL, but
# 1) honk does not support host-meta
# 2) WebFinger is always located at the same location anyway
profile_url = await self._finger_actor(username, instance)
try: try:
async with self._http.get(profile_url) as resp: profile = await resp.json() async with self._http.get(profile_url) as resp: profile = await resp.json()