wardyns-feditools/fedisearch.py

89 lines
2.5 KiB
Python

# Import modules
import os
import html2text
from argparse import ArgumentParser
import json
import requests
parent = os.path.dirname(os.path.realpath(__file__))
if not os.path.exists(os.path.join(parent, 'config.json')):
generate_config("Wardyn's feditools", "read write follow push")
with open(os.path.join(parent, 'config.json'), 'r') as config_file:
config = json.load(config_file)
session = requests.Session()
session.headers.update({"Authorization" : "Bearer " + config['user_token']})
# Initialize arguments
parser = ArgumentParser(description='Search a fedi users posts for a specific word or phrase')
parser.add_argument('-c', '--case',
action='store_const',
const=True,
help='Match string as case sensitive',
default=False,
dest='case'
)
parser.add_argument('-d', '--dms',
action='store_const',
const=True,
help='Include DMs',
default=False,
dest='dms'
)
parser.add_argument('account',
type=str,
help='Account to search through',
)
parser.add_argument('pattern',
type=str,
help='Pattern to search for',
)
args = parser.parse_args()
case = args.case
dms = args.dms
account = args.account
pattern = args.pattern
# Main block
if case == False:
pattern = pattern.lower()
oldest_status_id = None
htmlconvert = html2text.HTML2Text()
htmlconvert.ignore_links = True
htmlconvert.body_width = 0
if account[0] == '@':
account = account[1:]
if len(account.split('@')) == 1:
account = account + '@' + config['instance'].split('/')[2]
accountlist = session.get(config['instance'] + '/api/v2/search', params={'q':account}).json()['accounts']
for curaccount in accountlist:
print(curaccount['fqn'])
if curaccount['fqn'].lower() == account.lower():
account = curaccount
break
if type(account) is str:
print('Could not find an account with the search term: ' + account)
exit()
accid = account['id']
print('Searching for posts including "' + pattern + '" from user ' + account['fqn'])
print('\n---\n')
while True:
statuses = session.get(config['instance'] + '/api/v1/accounts/' + accid + '/statuses', params={'max_id':oldest_status_id, 'limit':40}).json()
oldest_status_id = statuses[-1]['id']
for status in statuses:
if status['reblog'] == None:
if status['visibility'] == 'direct' and not dms:
continue
content = str(htmlconvert.handle(status['content']))
if case == False:
content = content.lower()
if pattern in content:
print(content)
print('\nlink: ' + status['url'])
print('\n---\n')
if len(statuses) < 40:
break
print('Finished searching')