implements("bogofilter"); % To use, add this line to the end of your score file: % include bogospam.score % % You may have to create that file if it doesn't exist, i.e. touch bogospam.score. % % Bogofilter is automatically trained when you leave a group. % % Any message that you read and has a positive score, will be learned as ham (non-spam). % % Any message that is marked as low_score is trained as spam. % % If you read a message that is interesting but has a low score, hit ^H. % If you read a message that is spam but has a high score, hit ^S. % % These keys just adjust the score, all the training happens on leaving the % group. % % If you want to only use bogofilter for certain servers, you can do this: % % #ifn$NNTPSERVER news://nospam.safeserver.com % interpret "bogofilter.sl" % #endif % % TODO: is it better to set the header flag, rather than adjust score? % TODO: offer user a list to unselect spams, e.g. get_select_box_response % % The default score adjustment you want to apply when bogofilter thinks a % message is spam. private variable bogoscore = Assoc_Type[Int_Type, -99]; % The default score adjustment you want to apply when bogofilter thinks a % message is ham. private variable bogohamscore = Assoc_Type[Int_Type, +35]; % You can override that number per group, e.g. bogoscore["uk.legal"] = -100; % Spammy Group % You can disable it for groups that are moderated. bogoscore["comp.taxes.moderated"] = 0; % Safe Group % Set to zero if you don't want to confirm training, just do it. private variable confirmtrain = 0; % Default keybindings %% The current article is spam. definekey("set_header_score(get_variable_value(\"max_low_score\") - 1)", "^s", "article"); %% Reset current article score, so that it will be learned as ham. definekey("set_header_score(0)", "^h", "article"); %% Optional, if you make a mistake you can undo a learning. %definekey("bogofilter->learn_false_ham", "^t", "article"); %definekey("bogofilter->learn_false_spam", "^f", "article"); % Optional, you can add bindings like this to fine tune a messages score. %definekey("set_header_score(get_header_score()-1), "-", "article"); %definekey("set_header_score(get_header_score()+1), "+", "article"); % The name of the scorefile. private variable scorefile = "bogospam.score"; % Return a header thats safe to use as a regexp in a score file. private define sanitized_article_header(hdr) { variable quoted; variable val; variable len; % bogofilter will truncate this if it's over 90 chars, but we can't cap it % at the worst case (45), that's too tiny. Keep subtracting characters % until it fits. val = extract_article_header(hdr); len = strlen(val); do { quoted = str_quote_string(substr(val, 1, len), "()$\\[].^*+?", '\\'); len--; } while (strlen(quoted) > 90); return quoted; } % Create a fake mbox from the current article headers. private define bogofilter_get_headers() { % Fake mbox entry return sprintf("From slrn\n" + "From: %s\n" + "Subject: %s\n" + "Xref: %s\n" + "Message-ID: %s\n\n", extract_article_header("From"), extract_article_header("Subject"), extract_article_header("Xref"), sanitized_article_header("Message-ID")); } % Learn this message as spam. private define learn_article_spam () { variable bogofilter = popen("bogofilter -sM", "w"); () = fwrite(bogofilter_get_headers(), bogofilter); () = pclose(bogofilter); } % Learn this message as ham. private define learn_article_ham () { variable bogofilter = popen("bogofilter -nM", "w"); () = fwrite(bogofilter_get_headers(), bogofilter); () = pclose(bogofilter); } % This message was incorrectly learnt as ham (non-spam). static define learn_false_ham () { variable bogofilter = popen("bogofilter -sNM", "w"); () = fwrite(bogofilter_get_headers(), bogofilter); () = pclose(bogofilter); } % This message was incorrectly learnt as spam. static define learn_false_spam () { variable bogofilter = popen("bogofilter -nSM", "w"); () = fwrite(bogofilter_get_headers(), bogofilter); () = pclose(bogofilter); } % This is used to keep track of which messages were unseen on entering a group. private variable unreadmsgs; % Classify messages on leaving groups. static define classify_group_scores () { variable hammsgs = {}; variable spammsgs = {}; variable message; % make sure all messages are expanded. uncollapse_threads(); % check which messages are not unread. foreach message (unreadmsgs) { % Jump to this message. !if (locate_header_by_msgid(message, 0)) continue; % Only read messages are considered. !if (get_header_flags() & HEADER_READ) { continue; } % Only non-spam messages. if (get_header_score() >= 0) { list_append(hammsgs, message); continue; } if (get_header_flags() & HEADER_LOW_SCORE) { list_append(spammsgs, message); continue; } } % Check if there are any messages to learn. if (length(hammsgs)) { if (not confirmtrain || get_yes_no_cancel(sprintf( "Learn %u read message(s) as non-spam", length(hammsgs))) == 1) { % Send each one to bogofilter. foreach message (hammsgs) { if (locate_header_by_msgid(message, 0)) learn_article_ham(); } } } if (length(spammsgs)) { if (not confirmtrain || get_yes_no_cancel(sprintf( "Learn %u low-score message(s) as spam", length(spammsgs))) == 1) { % Send each one to bogofilter. foreach message (spammsgs) { if (locate_header_by_msgid(message, 0)) learn_article_spam(); } } } return; } private define get_scorefile_name() { variable slrnscore = get_variable_value("scorefile"); variable slrnscoredir = path_dirname(slrnscore); % We will put our scorefile file next to the slrn scorefile. return sprintf("%s/%s", make_home_filename(slrnscoredir), scorefile); } % This is called when we enter a group to classify the messages. static define bogofilter_process_group () { variable bogofilter; variable result; variable cmd; variable filename; variable spamhdr; variable hamhdr; variable awkspam; variable awkham; variable bogoformat; % How bogofilter should format the result. bogoformat = "Message-ID: %I %% %c"; % The name of the scorefile. filename = get_scorefile_name(); % Awk scripts to sort results. spamhdr = sprintf("[%s]\\nScore: %d %% Bogofilter\\n", current_newsgroup(), bogoscore[current_newsgroup()]); hamhdr = sprintf("[%s]\\nScore: +%d %% Bogofilter\\n", current_newsgroup(), bogohamscore[current_newsgroup()]); awkspam = sprintf("/%% Spam$/ { print \"%s\",$1,$2 >> \"%s\" }", spamhdr, filename); awkham = sprintf("/%% Ham$/ { print \"%s\",$1,$2 >> \"%s\" }", hamhdr, filename); % Truncate any existing file. () = fclose(fopen(filename, "w+")); % Generate a scorefile from bogofilter output. cmd = sprintf("bogofilter -v -M --header-format='%s' 2> /dev/null | awk '%s%s'", bogoformat, awkspam, awkham); bogofilter = popen(cmd, "w"); unreadmsgs = {}; do { !if (get_header_flags() & HEADER_READ) { % Send to bogofilter. () = fwrite(bogofilter_get_headers(), bogofilter); % Record this message-id. list_append(unreadmsgs, extract_article_header ("Message-ID")); } else if (get_header_flags() & HEADER_LOW_SCORE) { % The problem is that HEADER_LOW_SCORE implies HEADER_READ, so we % will never learn them. This is probably the wrong solution. list_append(unreadmsgs, extract_article_header ("Message-ID")); } } while (header_down(1) == 1); () = pclose(bogofilter); % Now apply those scores. if (length(unreadmsgs)) { reload_scorefile(1); } } static define prepare_startup () { message(""); % Print some stats. system("bogoutil -w ~/.bogofilter .MSG_COUNT"); } () = register_hook ("article_mode_hook", "bogofilter->bogofilter_process_group"); () = register_hook ("article_mode_quit_hook", "bogofilter->classify_group_scores"); () = register_hook ("startup_hook", "bogofilter->prepare_startup");