#
# $Id: filter_innd.py,v 1.2 1999/09/23 14:24:23 kondou Exp $
#
# This is a sample filter for the Python innd hook.
#
# For details, see the file README.python_hook that came with INN.
#

import re
from string import *

# This looks weird, but creating and interning these strings should
# let us get faster access to header keys (which innd also interns) by
# losing some strcmps under the covers.
Approved = intern("Approved");           Control = intern("Control")
Date = intern("Date");                   Distribution = intern("Distribution")
Expires = intern("Expires");             From = intern("From")
Lines = intern("Lines");                 Message_ID = intern("Message-ID")
Newsgroups = intern("Newsgroups");       Path = intern("Path")
Reply_To = intern("Reply-To");           Sender = intern("Sender")
Subject = intern("Subject");             Supersedes = intern("Supersedes")
Bytes = intern("Bytes");                 Also_Control = intern("Also-Control")
References = intern("References");       Xref = intern("Xref")
Keywords = intern("Keywords");           X_Trace = intern("X-Trace")
NNTP_Posting_Host = intern("NNTP-Posting-Host")
Followup_To = intern("Followup-To");     Organization = intern("Organization")
Content_Type = intern("Content-Type");   Content_Base = intern("Content-Base")
Content_Disposition = intern("Content-Disposition")
X_Newsreader = intern("X-Newsreader");   X_Mailer = intern("X-Mailer")
X_Newsposter = intern("X-Newsposter")
X_Cancelled_By = intern("X-Cancelled-By")
X_Canceled_By = intern("X-Canceled-By"); Cancel_Key = intern("Cancel-Key")
__BODY__ = intern("__BODY__");           __LINES__ = intern("__LINES__")


class InndFilter:
    """Provide filtering callbacks to innd."""

    def __init__(self):
        """This runs every time the filter is loaded or reloaded.

        This is a good place to initialize variables and precompile
        regular expressions, or maybe reload stats from disk.
        """
        self.re_newrmgroup = re.compile('(?:new|rm)group\s')
        self.re_obsctl = re.compile('(?:sendsys|version|uuname)')
        # msgid  pattern from a once-common spambot.
        self.re_none44 = re.compile('none\d+\.yet>')
        # There is a mad newgrouper who likes to meow.
        self.re_meow = re.compile("^Meow\!", re.M)
        # One of my silly addresses.
        self.re_fluffymorph = re.compile("andruQ@myremarQ.coM", re.I)

    def filter_before_reload(self):
        """Runs just before the filter gets reloaded.

        You can use this method to save state information to be
        restored by the __init__() method or down in the main module.
        """
        syslog('notice', "filter_before_reload executing...")

    def filter_close(self):
        """Runs when innd exits.

        You can use this method to save state information to be
        restored by the __init__() method or down in the main module.
        """
        syslog('notice', "filter_close running, bye!")

    def filter_messageid(self, msgid):
        """Filter articles just by their message IDs.

        This method interacts with the IHAVE and CHECK NNTP commands.
        If you return a non-empty string here, the offered article
        will be refused before you ever have to waste any bandwidth
        looking at it.  This is not foolproof, so you should do your
        ID checks both here and in filter_art.  (TAKETHIS does not
        offer the ID for examination, and a TAKETHIS isn't always
        preceded by a CHECK.)
        """
        return ""               # deactivate the samples.
        
        if self.re_none44.search(msgid):
            return "But I don't like spam!"
        if msgid[0:8] == '<cancel.':
            return "I don't do cybercancels."

    def filter_art(self, art):
        """Decide whether to keep offered articles.

        art is a dictionary with a bunch of headers, the article's
        body, and innd's reckoning of the line count.  Itens not
        in the article will have a value of None.

        The available headers are the ones listed near the top of
        innd/art.c.  At this writing, they are:

            Approved, Control, Date, Distribution, Expires, From,
            Lines, Message-ID, Newsgroups, Path, Reply-To, Sender,
            Subject, Supersedes, Bytes, Also-Control, References,
            Xref, Keywords, X-Trace, NNTP-Posting-Host, Folowup-To,
            Organization, Content-Type, Content-Base,
            Content-Disposition, X-Newsreader, X-Mailer, X-Newsposter,
            X-Cancelled-By, X-Canceled-By and Cancel-Key.

        The body is the buffer in art['__BODY__'] and the INN-reckoned
        line count is held as an integer in art['__LINES__'].  (The
        Lines: header is often generated by the poster, and large
        differences can be a good indication of a corrupt article.)

        If you want to keep an article, return None or "".  If you
        want to reject, return a non-empty string.  The rejection
        string will appear in transfer and posting response banners,
        and local posters will see them if their messages are
        rejected.
        """
        return ""               # deactivate the samples.

        # catch bad IDs from articles fed with TAKETHIS but no CHECK.
        idcheck = self.filter_messageid(art[Message_ID])
        if idcheck:
            return idcheck

        # There are some control messages we don't want to process or
        # forward to other sites.
        try:
            if art[Control] is not None:
                if self.re_newrmgroup.match(art[Control]):
                    if self.re_meow.search(art[__BODY__]):
                        return "The fake tale meows again."
                    if art[Distribution] == buffer('mxyzptlk'):
                        return "Evil control message from the 10th dimension"
                if self.re_obsctl.match(art[Control]):
                    return "Obsolete control message"

            # If you don't know, you don't want to know.
            if self.re_fluffymorph.search(art[From]):
                return "No, you may NOT meow."
        except:
            syslog('n', str(sys.exc_info[1]))

    def filter_mode(self, oldmode, newmode, reason):
        """Capture server events and do something useful.

        When the admin throttles or pauses innd (and lets it go
        again), this method will be called.  oldmode is the state we
        just left, and newmode is where we are going.  reason is
        usually just a comment string.

        The possible values of newmode and oldmode are the four
        strings 'running', 'paused', 'throttled' and 'unknown'.
        Actually 'unknown' shouldn't happen, it's there in case
        feeping creatures invade innd.
        """
        syslog('notice', 'state change from %s to %s - %s'
               % (oldmode, newmode, reason))



"""
Okay, that's the end of our class definition.  What follows is the
stuff you need to do to get it all working inside innd.
"""

# This import must succeed, or your filter won't work.  I'll repeat
# that: You MUST import INN.
from INN import *


#   Some of the stuff below is gratuitous, just demonstrating how the
#   INN.syslog call works.  That first thingy tells the Unix syslogger
#   what severity to use; you can abbreviate down to one letter and
#   it's case insensitive.  Available levels are (in increasing levels
#   of seriousness) Debug, Info, Notice, Warning, Err, Crit, and
#   Alert.  If you provide any other string, it will be defaulted to
#   Notice.  You'll find the entries in the same log files innd itself
#   uses, with an 'innd: python:' prefix.
#
#   The native Python syslog module seems to clash with INN, so use
#   INN's.  Oh yeah -- you may notice that stdout and stderr have been
#   redirected to /dev/null -- if you want to print stuff, open your
#   own files.

try:
    import sys
except Exception, errmsg:
    syslog('Error', "import boo-boo: " + errmsg[0])


#     If you want to do something special when the server first starts
#     up, this is how to find out when it's time.

if 'spamfilter' not in dir():
    syslog ('n', "First load, so I can do initialization stuff.")
    #  You could unpickle a saved hash here, so that your hard-earned
    #  spam scores aren't lost whenver you shut down innd.
else:
    syslog ('NoTicE', "I'm just reloading, so skip the formalities.")


#  Finally, here is how we get our class on speaking terms with innd.
#  The hook is refreshed on every reload, so that you can change the
#  methods on a running server.  Don't forget to test your changes
#  before reloading!
spamfilter = InndFilter()
try:
    set_filter_hook(spamfilter)
    syslog('n', "spamfilter successfully hooked into INN")
except Exception, errmsg:
    syslog('e', "Cannot obtain INN hook for spamfilter: %s" % errmsg[0])

