wardyns-feditools/fedisearch.py

# Import modules
import os
import html2text
from argparse import ArgumentParser
import json
import requests

parent = os.path.dirname(os.path.realpath(__file__))

if not os.path.exists(os.path.join(parent, 'config.json')):
    generate_config("Wardyn's feditools", "read write follow push")
with open(os.path.join(parent, 'config.json'), 'r') as config_file:
    config = json.load(config_file)

session = requests.Session()
session.headers.update({"Authorization" : "Bearer " + config['user_token']})

# Initialize arguments
parser = ArgumentParser(description='Search a fedi users posts for a specific word or phrase')
parser.add_argument('-c', '--case',
 	action='store_const',
 	const=True,
 	help='Match string as case sensitive',
 	default=False,
 	dest='case'
)
parser.add_argument('-d', '--dms',
 	action='store_const',
 	const=True,
 	help='Include DMs',
 	default=False,
 	dest='dms'
)
parser.add_argument('account',
	type=str,
	help='Account to search through',
)
parser.add_argument('pattern',
	type=str,
	help='Pattern to search for',

)
args = parser.parse_args()
case = args.case
dms = args.dms
account = args.account
pattern = args.pattern

# Main block
if case == False:
	pattern = pattern.lower()
oldest_status_id = None
htmlconvert = html2text.HTML2Text()
htmlconvert.ignore_links = True
htmlconvert.body_width = 0
if account[0] == '@':
	account = account[1:]
if len(account.split('@')) == 1:
	account = account + '@' + config['instance'].split('/')[2]
accountlist = session.get(config['instance'] + '/api/v2/search', params={'q':account}).json()['accounts']

for curaccount in accountlist:
	print(curaccount['fqn'])
	if curaccount['fqn'].lower() == account.lower():
		account = curaccount
		break
if type(account) is str:
	print('Could not find an account with the search term: ' + account)
	exit()
accid = account['id']
print('Searching for posts including "' + pattern + '" from user ' + account['fqn'])
print('\n---\n')
while True:
	statuses = session.get(config['instance'] + '/api/v1/accounts/' + accid + '/statuses', params={'max_id':oldest_status_id, 'limit':40}).json()
	oldest_status_id = statuses[-1]['id']
	for status in statuses:
		if status['reblog'] == None:
			if status['visibility'] == 'direct' and not dms:
				continue
			content = str(htmlconvert.handle(status['content']))
			if case == False:
				content = content.lower()
			if pattern in content:
				print(content)
				print('\nlink: ' + status['url'])
				print('\n---\n')
	if len(statuses) < 40:
		break
print('Finished searching')