implements("bogofilter");

% To use, add this line to the end of your score file:
% include bogospam.score
%
% You may have to create that file if it doesn't exist, i.e. touch bogospam.score.
%
% Bogofilter is automatically trained when you leave a group.
%
% Any message that you read and has a positive score, will be learned as ham (non-spam).
%
% Any message that is marked as low_score is trained as spam.
%
% If you read a message that is interesting but has a low score, hit ^H.
% If you read a message that is spam but has a high score, hit ^S.
%
% These keys just adjust the score, all the training happens on leaving the
% group.
%
% If you want to only use bogofilter for certain servers, you can do this:
%
% #ifn$NNTPSERVER news://nospam.safeserver.com
% interpret "bogofilter.sl"
% #endif
%
% TODO: is it better to set the header flag, rather than adjust score?
% TODO: offer user a list to unselect spams, e.g. get_select_box_response
%

% The default score adjustment you want to apply when bogofilter thinks a
% message is spam.
private variable bogoscore = Assoc_Type[Int_Type, -99];

% The default score adjustment you want to apply when bogofilter thinks a
% message is ham.
private variable bogohamscore = Assoc_Type[Int_Type, +35];

% You can override that number per group, e.g.
bogoscore["uk.legal"] = -100; % Spammy Group

% You can disable it for groups that are moderated.
bogoscore["comp.taxes.moderated"] = 0; % Safe Group

% Set to zero if you don't want to confirm training, just do it.
private variable confirmtrain = 0;

% Default keybindings
%% The current article is spam.
definekey("set_header_score(get_variable_value(\"max_low_score\") - 1)", "^s", "article");

%% Reset current article score, so that it will be learned as ham.
definekey("set_header_score(0)", "^h", "article");

%% Optional, if you make a mistake you can undo a learning.
%definekey("bogofilter->learn_false_ham", "^t", "article");
%definekey("bogofilter->learn_false_spam", "^f", "article");

% Optional, you can add bindings like this to fine tune a messages score.
%definekey("set_header_score(get_header_score()-1), "<Esc>-", "article");
%definekey("set_header_score(get_header_score()+1), "<Esc>+", "article");

% The name of the scorefile.
private variable scorefile = "bogospam.score";

% Return a header thats safe to use as a regexp in a score file.
private define sanitized_article_header(hdr)
{
    variable quoted;
    variable val;
    variable len;

    % bogofilter will truncate this if it's over 90 chars, but we can't cap it
    % at the worst case (45), that's too tiny. Keep subtracting characters
    % until it fits.
    val = extract_article_header(hdr);
    len = strlen(val);

    do {
        quoted = str_quote_string(substr(val, 1, len), "()$\\[].^*+?", '\\');
        len--;
    } while (strlen(quoted) > 90);
    return quoted;
}

% Create a fake mbox from the current article headers.
private define bogofilter_get_headers()
{
    % Fake mbox entry
    return sprintf("From slrn\n"
                 + "From: %s\n"
                 + "Subject: %s\n"
                 + "Xref: %s\n"
                 + "Message-ID: %s\n\n",
                 extract_article_header("From"),
                 extract_article_header("Subject"),
                 extract_article_header("Xref"),
                 sanitized_article_header("Message-ID"));
}

% Learn this message as spam.
private define learn_article_spam ()
{
    variable bogofilter = popen("bogofilter -sM", "w");
    () = fwrite(bogofilter_get_headers(), bogofilter);
    () = pclose(bogofilter);
}

% Learn this message as ham.
private define learn_article_ham ()
{
    variable bogofilter = popen("bogofilter -nM", "w");
    () = fwrite(bogofilter_get_headers(), bogofilter);
    () = pclose(bogofilter);
}

% This message was incorrectly learnt as ham (non-spam).
static define learn_false_ham ()
{
    variable bogofilter = popen("bogofilter -sNM", "w");
    () = fwrite(bogofilter_get_headers(), bogofilter);
    () = pclose(bogofilter);
}

% This message was incorrectly learnt as spam.
static define learn_false_spam ()
{
    variable bogofilter = popen("bogofilter -nSM", "w");
    () = fwrite(bogofilter_get_headers(), bogofilter);
    () = pclose(bogofilter);
}

% This is used to keep track of which messages were unseen on entering a group.
private variable unreadmsgs;

% Classify messages on leaving groups.
static define classify_group_scores ()
{
    variable hammsgs = {};
    variable spammsgs = {};
    variable message;

    % make sure all messages are expanded.
    uncollapse_threads();

    % check which messages are not unread.
    foreach message (unreadmsgs) {
        % Jump to this message.
        !if (locate_header_by_msgid(message, 0))
            continue;

        % Only read messages are considered.
        !if (get_header_flags() & HEADER_READ) {
            continue;
        }

        % Only non-spam messages.
        if (get_header_score() >= 0) {
            list_append(hammsgs, message);
            continue;
        }

        if (get_header_flags() & HEADER_LOW_SCORE) {
            list_append(spammsgs, message);
            continue;
        }
    }

    % Check if there are any messages to learn.
    if (length(hammsgs)) {
        if (not confirmtrain || get_yes_no_cancel(sprintf(
                                    "Learn %u read message(s) as non-spam",
                                    length(hammsgs))) == 1) {

            % Send each one to bogofilter.
            foreach message (hammsgs) {
                if (locate_header_by_msgid(message, 0))
                    learn_article_ham();
            }
        }
    }

    if (length(spammsgs)) {
        if (not confirmtrain || get_yes_no_cancel(sprintf(
                                    "Learn %u low-score message(s) as spam",
                                    length(spammsgs))) == 1) {
            % Send each one to bogofilter.
            foreach message (spammsgs) {
                if (locate_header_by_msgid(message, 0))
                    learn_article_spam();
            }
        }
    }

    return;
}

private define get_scorefile_name()
{
    variable slrnscore = get_variable_value("scorefile");
    variable slrnscoredir = path_dirname(slrnscore);

    % We will put our scorefile file next to the slrn scorefile.
    return sprintf("%s/%s", make_home_filename(slrnscoredir), scorefile);
}

% This is called when we enter a group to classify the messages.
static define bogofilter_process_group ()
{
    variable bogofilter;
    variable result;
    variable cmd;
    variable filename;
    variable spamhdr;
    variable hamhdr;
    variable awkspam;
    variable awkham;
    variable bogoformat;

    % How bogofilter should format the result.
    bogoformat = "Message-ID: %I %% %c";

    % The name of the scorefile.
    filename = get_scorefile_name();

    % Awk scripts to sort results.
    spamhdr = sprintf("[%s]\\nScore: %d %% Bogofilter\\n",
                      current_newsgroup(),
                      bogoscore[current_newsgroup()]);
    hamhdr = sprintf("[%s]\\nScore: +%d %% Bogofilter\\n",
                      current_newsgroup(),
                      bogohamscore[current_newsgroup()]);

    awkspam = sprintf("/%% Spam$/ { print \"%s\",$1,$2 >> \"%s\" }", spamhdr, filename);
    awkham = sprintf("/%% Ham$/ { print \"%s\",$1,$2 >> \"%s\" }", hamhdr, filename);

    % Truncate any existing file.
    () = fclose(fopen(filename, "w+"));

    % Generate a scorefile from bogofilter output.
    cmd = sprintf("bogofilter -v -M --header-format='%s' 2> /dev/null | awk '%s%s'",
                  bogoformat,
                  awkspam,
                  awkham);

    bogofilter = popen(cmd, "w");
    unreadmsgs = {};

    do {
        !if (get_header_flags() & HEADER_READ) {
            % Send to bogofilter.
            () = fwrite(bogofilter_get_headers(), bogofilter);

            % Record this message-id.
            list_append(unreadmsgs, extract_article_header ("Message-ID"));
        } else if (get_header_flags() & HEADER_LOW_SCORE) {
            % The problem is that HEADER_LOW_SCORE implies HEADER_READ, so we
            % will never learn them. This is probably the wrong solution.
            list_append(unreadmsgs, extract_article_header ("Message-ID"));
        }
    } while (header_down(1) == 1);

    () = pclose(bogofilter);

    % Now apply those scores.
    if (length(unreadmsgs)) {
        reload_scorefile(1);
    }
}

static define prepare_startup ()
{
    message("");
    % Print some stats.
    system("bogoutil -w ~/.bogofilter .MSG_COUNT");
}

() = register_hook ("article_mode_hook", "bogofilter->bogofilter_process_group");
() = register_hook ("article_mode_quit_hook", "bogofilter->classify_group_scores");
() = register_hook ("startup_hook", "bogofilter->prepare_startup");