forked from KayFaraday/pleroma-ebooks
add ability to ignore CWs
This commit is contained in:
parent
a904587b32
commit
71dbf59796
|
@ -6,6 +6,7 @@ This version makes quite a few changes from [the original](https://github.com/Je
|
||||||
- Non-Markov stuff
|
- Non-Markov stuff
|
||||||
- Stores toots in a sqlite database rather than a text file
|
- Stores toots in a sqlite database rather than a text file
|
||||||
- Doesn't unnecessarily redownload all toots every time
|
- Doesn't unnecessarily redownload all toots every time
|
||||||
|
- Ability to ignore specific CWs
|
||||||
|
|
||||||
## FediBooks
|
## FediBooks
|
||||||
Before you use mstdn-ebooks to create your own ebooks bot, I recommend checking out [FediBooks](https://fedibooks.com). Compared to mstdn-ebooks, FediBooks offers a few advantages:
|
Before you use mstdn-ebooks to create your own ebooks bot, I recommend checking out [FediBooks](https://fedibooks.com). Compared to mstdn-ebooks, FediBooks offers a few advantages:
|
||||||
|
@ -54,6 +55,7 @@ Configuring mstdn-ebooks is accomplished by editing `config.json`. If you want t
|
||||||
| cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. |
|
| cw | null | The content warning (aka subject) mstdn-ebooks will apply to non-error posts. |
|
||||||
| instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. |
|
| instance_blacklist | ["bofa.lol", "witches.town", "knzk.me"] | If your bot is following someone from a blacklisted instance, it will skip over them and not download their posts. This is useful for ensuring that mstdn-ebooks doesn't waste time trying to download posts from dead instances, without you having to unfollow the user(s) from them. |
|
||||||
| learn_from_cw | false | If true, mstdn-ebooks will learn from CW'd posts. |
|
| learn_from_cw | false | If true, mstdn-ebooks will learn from CW'd posts. |
|
||||||
|
| ignored_cws | [] | If `learn_from_cw` is true, do not learn from posts with these CWs.
|
||||||
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
| mention_handling | 1 | 0: Never use mentions. 1: Only generate fake mentions in the middle of posts, never at the start. 2: Use mentions as normal (old behaviour). |
|
||||||
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
| max_thread_length | 15 | The maximum number of bot posts in a thread before it stops replying. A thread can be 10 or 10000 posts long, but the bot will stop after it has posted `max_thread_length` times. |
|
||||||
| strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
| strip_paired_punctuation | false | If true, mstdn-ebooks will remove punctuation that commonly appears in pairs, like " and (). This avoids the issue of posts that open a bracket (or quote) without closing it. |
|
||||||
|
|
|
@ -19,9 +19,10 @@ def make_sentence(output, cfg):
|
||||||
db.text_factory = str
|
db.text_factory = str
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
if cfg['learn_from_cw']:
|
if cfg['learn_from_cw']:
|
||||||
toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
|
ignored_cws_query_params = "(" + ",".join("?" * len(cfg["ignored_cws"])) + ")"
|
||||||
|
toots = c.execute(f"SELECT content FROM `toots` WHERE cw NOT IN {ignored_cws_query_params} ORDER BY RANDOM() LIMIT 10000", cfg["ignored_cws"]).fetchall()
|
||||||
else:
|
else:
|
||||||
toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall()
|
toots = c.execute("SELECT content FROM `toots` WHERE cw IS NULL ORDER BY RANDOM() LIMIT 10000").fetchall()
|
||||||
|
|
||||||
if len(toots) == 0:
|
if len(toots) == 0:
|
||||||
output.send("Database is empty! Try running main.py.")
|
output.send("Database is empty! Try running main.py.")
|
||||||
|
|
43
main.py
43
main.py
|
@ -31,7 +31,8 @@ cfg = {
|
||||||
"length_lower_limit": 5,
|
"length_lower_limit": 5,
|
||||||
"length_upper_limit": 50,
|
"length_upper_limit": 50,
|
||||||
"overlap_ratio_enabled": False,
|
"overlap_ratio_enabled": False,
|
||||||
"overlap_ratio": 0.7
|
"overlap_ratio": 0.7,
|
||||||
|
"ignored_cws": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -94,46 +95,10 @@ following = client.account_following(me.id)
|
||||||
db = sqlite3.connect("toots.db")
|
db = sqlite3.connect("toots.db")
|
||||||
db.text_factory = str
|
db.text_factory = str
|
||||||
c = db.cursor()
|
c = db.cursor()
|
||||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)")
|
c.execute("CREATE TABLE IF NOT EXISTS `toots` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw VARCHAR, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)")
|
||||||
c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ")
|
c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ")
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
tableinfo = c.execute("PRAGMA table_info(`toots`)").fetchall()
|
|
||||||
found = False
|
|
||||||
columns = []
|
|
||||||
for entry in tableinfo:
|
|
||||||
if entry[1] == "sortid":
|
|
||||||
found = True
|
|
||||||
break
|
|
||||||
columns.append(entry[1])
|
|
||||||
|
|
||||||
if not found:
|
|
||||||
print("Migrating to new database format. Please wait...")
|
|
||||||
print("WARNING: If any of the accounts your bot is following are Pleroma users, please delete toots.db and run main.py again to create it anew.")
|
|
||||||
try:
|
|
||||||
c.execute("DROP TABLE `toots_temp`")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
c.execute("CREATE TABLE `toots_temp` (sortid INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT, id VARCHAR NOT NULL, cw INT NOT NULL DEFAULT 0, userid VARCHAR NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL)")
|
|
||||||
for f in following:
|
|
||||||
user_toots = c.execute("SELECT * FROM `toots` WHERE userid LIKE ? ORDER BY id", (f.id,)).fetchall()
|
|
||||||
if user_toots is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if columns[-1] == "cw":
|
|
||||||
for toot in user_toots:
|
|
||||||
c.execute("INSERT INTO `toots_temp` (id, userid, uri, content, cw) VALUES (?, ?, ?, ?, ?)", toot)
|
|
||||||
else:
|
|
||||||
for toot in user_toots:
|
|
||||||
c.execute("INSERT INTO `toots_temp` (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", toot)
|
|
||||||
|
|
||||||
c.execute("DROP TABLE `toots`")
|
|
||||||
c.execute("ALTER TABLE `toots_temp` RENAME TO `toots`")
|
|
||||||
c.execute("CREATE TRIGGER IF NOT EXISTS `dedup` AFTER INSERT ON toots FOR EACH ROW BEGIN DELETE FROM toots WHERE rowid NOT IN (SELECT MIN(sortid) FROM toots GROUP BY uri ); END; ")
|
|
||||||
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def handleCtrlC(signal, frame):
|
def handleCtrlC(signal, frame):
|
||||||
print("\nPREMATURE EVACUATION - Saving chunks")
|
print("\nPREMATURE EVACUATION - Saving chunks")
|
||||||
|
@ -155,7 +120,7 @@ def insert_toot(oii, acc, post, cursor): # extracted to prevent duplication
|
||||||
pid = patterns["pid"].search(oii['object']['id']).group(0)
|
pid = patterns["pid"].search(oii['object']['id']).group(0)
|
||||||
cursor.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", (
|
cursor.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?, ?)", (
|
||||||
pid,
|
pid,
|
||||||
1 if (oii['object']['summary'] is not None and oii['object']['summary'] != "") else 0,
|
oii['object']['summary'] or None,
|
||||||
acc.id,
|
acc.id,
|
||||||
oii['object']['id'],
|
oii['object']['id'],
|
||||||
post
|
post
|
||||||
|
|
Loading…
Reference in New Issue