simplify account fetch by removing the WebFinger step

Now we fetch the user's profile URL from our home server, skipping the need to WebFinger them
and also supporting instances that don't support http:// connections.
This commit is contained in:
Kay Faraday 2022-06-15 19:53:43 +00:00
parent 61e05de199
commit b73526a895
1 changed files with 15 additions and 34 deletions

View File

@ -75,32 +75,24 @@ class PostFetcher:
async def __aexit__(self, *excinfo):
return await self._ctx_stack.__aexit__(*excinfo)
# username@instance
AccountHandle = NewType('AccountHandle', str)
async def fetch_all(self):
"""fetch all following accounts, or an iterable of accounts if provided"""
await self._fedi.verify_credentials()
self._completed_accounts = {}
async with anyio.create_task_group() as tg:
for fqn in map(self.fqn, await self._fedi.following()):
tg.start_soon(self._do_account, fqn)
# XXX it's assumed that no more than one API page of people are being followed at one time
for account in await self._fedi.following():
profile_url = account['url']
tg.start_soon(self._do_account, profile_url)
def fqn(self, acc: dict):
try:
return acc['fqn']
except KeyError:
fqn = acc['acct']
if '@' in fqn: return fqn
return fqn + '@' + URL(self.config['site']).host
async def _do_account(self, acc: AccountHandle):
async def _do_account(self, profile_url: str):
async with anyio.create_task_group() as tg:
self._completed_accounts[acc] = done_ev = anyio.Event()
self._completed_accounts[profile_url] = done_ev = anyio.Event()
tx, rx = anyio.create_memory_object_stream()
async with rx, tx:
tg.start_soon(self._process_pages, rx, acc)
tg.start_soon(self._fetch_account, tx, acc)
tg.start_soon(self._process_pages, rx, profile_url)
tg.start_soon(self._fetch_account, tx, profile_url)
await done_ev.wait()
# processing is complete, so halt fetching.
# processing may complete before fetching if we get caught up on new posts.
@ -154,19 +146,19 @@ class PostFetcher:
# TODO figure out why i put shield here lol
@shield
async def _fetch_account(self, tx, account: AccountHandle):
done_ev = self._completed_accounts[account]
async def _fetch_account(self, tx, profile_url):
done_ev = self._completed_accounts[profile_url]
try:
outbox = await self.fetch_outbox(account)
outbox = await self.fetch_outbox(profile_url)
except Exception as exc:
import traceback
traceback.print_exception(type(exc), exc, exc.__traceback__)
done_ev.set()
self.erroneous_accounts.append(account)
self.erroneous_accounts.append(profile_url)
return
print(f'Fetching posts for {account}...')
print(f'Fetching posts for {profile_url}...')
next_page_url = outbox['first']
while True:
@ -189,19 +181,8 @@ class PostFetcher:
done_ev.set()
async def fetch_outbox(self, handle):
"""finger handle, a fully-qualified ActivityPub actor name, returning their outbox URL"""
# it's fucking incredible how overengineered ActivityPub is btw
print('Fingering ', handle, '...', sep='')
username, at, instance = handle.lstrip('@').partition('@')
assert at == '@'
# i was planning on doing /.well-known/host-meta to find the webfinger URL, but
# 1) honk does not support host-meta
# 2) WebFinger is always located at the same location anyway
profile_url = await self._finger_actor(username, instance)
async def fetch_outbox(self, profile_url):
"""fetch the first page of the outbox for the given ActivityPub profile URL"""
try:
async with self._http.get(profile_url) as resp: profile = await resp.json()