"""leimonfold - A command-line tool to update threads in a maildir using lei & lore.org/all/""" from datetime import datetime from datetime import timedelta from datetime import timezone from email.headerregistry import DateHeader from mailbox import Maildir from subprocess import run as subp_run from sys import argv as sys_argv __version__ = "0.0.1-prealpha" __author__ = "Thorsten Leemhuis" __email__ = "linux@leemhuis.info" # todo: # * error handling # * support for formats other then maildir? def find_thread_heads(maildir_path): thread_heads = {} thread_members = {} maildir = Maildir(maildir_path, create=False) def clear_msgid(msgid): return msgid[1:-1] def parse_date(msg): kwds = {} DateHeader.parse(msg['Date'], kwds) return kwds['datetime'] def in_reply_to_known(in_reply_to): for i in in_reply_to: msgid = clear_msgid(i) if msgid in thread_heads or msgid in thread_members: return msgid return False # walk messages and store heads directly while recording thread members for msg in maildir: msgid = clear_msgid(msg['Message-ID']) date = parse_date(msg) if not msg['In-reply-to'] and not msg['References']: # print('head found: %s' % msg['Subject']) thread_heads[msgid] = date continue for i in ('References', 'In-reply-to'): if i in msg: thread_members[msgid] = msg[i].split() # hack: insert date into the first spot of the list; updated # later to contain the date of the latest posting in the thread thread_members[msgid].insert(0, date) break # detect if the head of all the replies was found in case a msg from the # middle of a thread not yet tracked was added to the maildir for msgid in thread_members.keys(): # reminder: there is a hack here, the date of the msg is stored in the first spot of the list upper_msgid = in_reply_to_known(thread_members[msgid][1:]) if not upper_msgid: # partial thread, so add it to the list of heads so it gets downloaded # print('partial thread found and added: %s' % msgid) thread_heads[msgid] = thread_members[msgid][0] else: if thread_heads[upper_msgid] > thread_members[msgid][0]: thread_heads[upper_msgid] = thread_members[msgid][0] return thread_heads def run_lei_query(maildir_path, mid): cmd = ['lei', 'q', '-I', 'https://lore.kernel.org/all/', '--augment', '-o', 'maildir:%s' % maildir_path, '--threads', '--dedupe=mid', mid] result = subp_run(cmd, capture_output=True) return result maildir_path = sys_argv[1] thread_heads = find_thread_heads(maildir_path) today = datetime.now(tz=timezone.utc) for msgid in thread_heads.keys(): delta = today - thread_heads[msgid] if delta.days > 90: # ignore threads older than 90 days continue result = run_lei_query(maildir_path, 'mid:%s rt:1.year.ago..' % msgid) # print(msgid, result.returncode, result.stdout, result.stderr)