forked from KayFaraday/pleroma-ebooks
fix for pleroma
This commit is contained in:
parent
a1324acfba
commit
eeba1c9066
120
main.py
120
main.py
|
@ -14,16 +14,16 @@ scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"]
|
||||||
cfg = json.load(open('config.json', 'r'))
|
cfg = json.load(open('config.json', 'r'))
|
||||||
|
|
||||||
if os.path.exists("clientcred.secret"):
|
if os.path.exists("clientcred.secret"):
|
||||||
print("Upgrading to new storage method")
|
print("Upgrading to new storage method")
|
||||||
cc = open("clientcred.secret").read().split("\n")
|
cc = open("clientcred.secret").read().split("\n")
|
||||||
cfg['client'] = {
|
cfg['client'] = {
|
||||||
"id": cc[0],
|
"id": cc[0],
|
||||||
"secret": cc[1]
|
"secret": cc[1]
|
||||||
}
|
}
|
||||||
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
|
cfg['secret'] = open("usercred.secret").read().rstrip("\n")
|
||||||
os.remove("clientcred.secret")
|
os.remove("clientcred.secret")
|
||||||
os.remove("usercred.secret")
|
os.remove("usercred.secret")
|
||||||
|
|
||||||
|
|
||||||
if "client" not in cfg:
|
if "client" not in cfg:
|
||||||
print("No client credentials, registering application")
|
print("No client credentials, registering application")
|
||||||
|
@ -105,6 +105,26 @@ def handleCtrlC(signal, frame):
|
||||||
|
|
||||||
signal.signal(signal.SIGINT, handleCtrlC)
|
signal.signal(signal.SIGINT, handleCtrlC)
|
||||||
|
|
||||||
|
def get_toots_legacy(client, id):
|
||||||
|
i = 0
|
||||||
|
toots = client.account_statuses(id)
|
||||||
|
while toots is not None and len(toots) > 0:
|
||||||
|
for toot in toots:
|
||||||
|
if toot.spoiler_text != "": continue
|
||||||
|
if toot.reblog is not None: continue
|
||||||
|
if toot.visibility not in ["public", "unlisted"]: continue
|
||||||
|
t = extract_toot(toot.content)
|
||||||
|
if t != None:
|
||||||
|
yield {
|
||||||
|
"toot": t,
|
||||||
|
"id": toot.id,
|
||||||
|
"uri": toot.uri
|
||||||
|
}
|
||||||
|
toots = client.fetch_next(toots)
|
||||||
|
i += 1
|
||||||
|
if i%20 == 0:
|
||||||
|
print('.', end='', flush=True)
|
||||||
|
|
||||||
for f in following:
|
for f in following:
|
||||||
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
|
last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone()
|
||||||
if last_toot != None:
|
if last_toot != None:
|
||||||
|
@ -114,7 +134,7 @@ for f in following:
|
||||||
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
|
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot))
|
||||||
|
|
||||||
#find the user's activitypub outbox
|
#find the user's activitypub outbox
|
||||||
#print("WebFingering...")
|
print("WebFingering...")
|
||||||
instance = re.search(r"^.*@(.+)", f.acct)
|
instance = re.search(r"^.*@(.+)", f.acct)
|
||||||
if instance == None:
|
if instance == None:
|
||||||
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
|
instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1)
|
||||||
|
@ -124,49 +144,73 @@ for f in following:
|
||||||
if instance == "bofa.lol":
|
if instance == "bofa.lol":
|
||||||
print("rest in piece bofa, skipping")
|
print("rest in piece bofa, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# print("{} is on {}".format(f.acct, instance))
|
# print("{} is on {}".format(f.acct, instance))
|
||||||
try:
|
try:
|
||||||
r = requests.get("https://{}/.well-known/host-meta".format(instance))
|
r = requests.get("https://{}/.well-known/host-meta".format(instance))
|
||||||
uri = re.search(r'template="([^"]+)"', r.text).group(1)
|
uri = re.search(r'template="([^"]+)"', r.text).group(1)
|
||||||
uri = uri.format(uri = "{}@{}".format(f.username, instance))
|
uri = uri.format(uri = "{}@{}".format(f.username, instance))
|
||||||
r = requests.get(uri)
|
r = requests.get(uri, headers={"Accept": "application/json"})
|
||||||
uri = r.json()['aliases'][1] #TODO: find out if it's safe to rely on this
|
j = r.json()
|
||||||
|
if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it
|
||||||
|
uri = j['aliases'][0]
|
||||||
|
else:
|
||||||
|
uri = j['aliases'][1]
|
||||||
uri = "{}/outbox?page=true&min_id={}".format(uri, last_toot)
|
uri = "{}/outbox?page=true&min_id={}".format(uri, last_toot)
|
||||||
r = requests.get(uri)
|
r = requests.get(uri)
|
||||||
j = r.json()
|
j = r.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
|
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
pleroma = False
|
||||||
|
if 'first' in j:
|
||||||
|
print("{} is a pleroma instance -- falling back to legacy toot collection method".format(instance))
|
||||||
|
pleroma = True
|
||||||
|
|
||||||
print("Downloading and parsing toots", end='', flush=True)
|
print("Downloading and parsing toots", end='', flush=True)
|
||||||
current = None
|
current = None
|
||||||
try:
|
try:
|
||||||
while len(j['orderedItems']) > 0:
|
if pleroma:
|
||||||
for oi in j['orderedItems']:
|
for t in get_toots_legacy(client, f.id):
|
||||||
if oi['type'] == "Create":
|
try:
|
||||||
# its a toost baby
|
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
|
||||||
content = oi['object']['content']
|
(t['id'],
|
||||||
if oi['object']['summary'] != None:
|
f.id,
|
||||||
#don't download CW'd toots
|
t['uri'],
|
||||||
continue
|
t['toot']
|
||||||
toot = extract_toot(content)
|
|
||||||
# print(toot)
|
|
||||||
try:
|
|
||||||
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
|
|
||||||
(re.search(r"[^\/]+$", oi['object']['id']).group(0),
|
|
||||||
f.id,
|
|
||||||
oi['object']['id'],
|
|
||||||
toot
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
pass
|
)
|
||||||
except:
|
except:
|
||||||
pass #ignore any toots that don't go into the DB
|
pass
|
||||||
# sys.exit(0)
|
|
||||||
r = requests.get(j['prev'])
|
else:
|
||||||
j = r.json()
|
while len(j['orderedItems']) > 0:
|
||||||
print('.', end='', flush=True)
|
for oi in j['orderedItems']:
|
||||||
|
if (not pleroma and oi['type'] == "Create") or (pleroma and oi['to']['type'] == "Create"):
|
||||||
|
# its a toost baby
|
||||||
|
content = oi['object']['content']
|
||||||
|
if oi['object']['summary'] != None:
|
||||||
|
#don't download CW'd toots
|
||||||
|
continue
|
||||||
|
toot = extract_toot(content)
|
||||||
|
# print(toot)
|
||||||
|
try:
|
||||||
|
pid = re.search(r"[^\/]+$", oi['object']['id']).group(0)
|
||||||
|
c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)",
|
||||||
|
(pid,
|
||||||
|
f.id,
|
||||||
|
oi['object']['id'],
|
||||||
|
toot
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
except:
|
||||||
|
pass #ignore any toots that don't go into the DB
|
||||||
|
# sys.exit(0)
|
||||||
|
r = requests.get(j['prev'])
|
||||||
|
j = r.json()
|
||||||
|
print('.', end='', flush=True)
|
||||||
print(" Done!")
|
print(" Done!")
|
||||||
db.commit()
|
db.commit()
|
||||||
except:
|
except:
|
||||||
|
|
Loading…
Reference in New Issue