1 ## $Id: filter_innd.py 7903 2008-06-22 20:41:59Z iulius $
3 ## This is a sample filter for the Python innd hook.
5 ## See the INN Python Filtering and Authentication Hooks documentation
6 ## for more information.
8 ## You have access to the following methods from the module INN:
9 ## - addhist(message-id)
10 ## - article(message-id)
11 ## - cancel(message-id)
12 ## - havehist(message-id)
13 ## - hashstring(string)
15 ## - newsgroup(groupname)
16 ## - set_filter_hook(instance)
17 ## - syslog(level, message)
22 ## This looks weird, but creating and interning these strings should
23 ## let us get faster access to header keys (which innd also interns) by
24 ## losing some strcmps under the covers.
25 Also_Control = intern("Also-Control")
26 Approved = intern("Approved")
27 Bytes = intern("Bytes")
28 Cancel_Key = intern("Cancel-Key")
29 Cancel_Lock = intern("Cancel-Lock")
30 Content_Base = intern("Content-Base")
31 Content_Disposition = intern("Content-Disposition")
32 Content_Transfer_Encoding = intern("Content-Transfer-Encoding")
33 Content_Type = intern("Content-Type")
34 Control = intern("Control")
36 Date_Received = intern("Date-Received")
37 Distribution = intern("Distribution")
38 Expires = intern("Expires")
40 Followup_To = intern("Followup-To")
42 In_Reply_To = intern("In-Reply-To")
43 Injection_Date = intern("Injection-Date")
44 Injection_Info = intern("Injection-Info")
45 Keywords = intern("Keywords")
46 Lines = intern("Lines")
47 List_ID = intern("List-ID")
48 Message_ID = intern("Message-ID")
49 MIME_Version = intern("MIME-Version")
50 Newsgroups = intern("Newsgroups")
51 NNTP_Posting_Date = intern("NNTP-Posting-Date")
52 NNTP_Posting_Host = intern("NNTP-Posting-Host")
53 Organization = intern("Organization")
54 Originator = intern("Originator")
56 Posted = intern("Posted")
57 Posting_Version = intern("Posting-Version")
58 Received = intern("Received")
59 References = intern("References")
60 Relay_Version = intern("Relay-Version")
61 Reply_To = intern("Reply-To")
62 Sender = intern("Sender")
63 Subject = intern("Subject")
64 Supersedes = intern("Supersedes")
65 User_Agent = intern("User-Agent")
66 X_Auth = intern("X-Auth")
67 X_Canceled_By = intern("X-Canceled-By")
68 X_Cancelled_By = intern("X-Cancelled-By")
69 X_Complaints_To = intern("X-Complaints-To")
70 X_Face = intern("X-Face")
71 X_HTTP_UserAgent = intern("X-HTTP-UserAgent")
72 X_HTTP_Via = intern("X-HTTP-Via")
73 X_Mailer = intern("X-Mailer")
74 X_Modbot = intern("X-Modbot")
75 X_Modtrace = intern("X-Modtrace")
76 X_Newsposter = intern("X-Newsposter")
77 X_Newsreader = intern("X-Newsreader")
78 X_No_Archive = intern("X-No-Archive")
79 X_Original_Message_ID = intern("X-Original-Message-ID")
80 X_Original_Trace = intern("X-Original-Trace")
81 X_Originating_IP = intern("X-Originating-IP")
82 X_PGP_Key = intern("X-PGP-Key")
83 X_PGP_Sig = intern("X-PGP-Sig")
84 X_Poster_Trace = intern("X-Poster-Trace")
85 X_Postfilter = intern("X-Postfilter")
86 X_Proxy_User = intern("X-Proxy-User")
87 X_Submissions_To = intern("X-Submissions-To")
88 X_Trace = intern("X-Trace")
89 X_Usenet_Provider = intern("X-Usenet-Provider")
91 __BODY__ = intern("__BODY__")
92 _LINES__ = intern("__LINES__")
96 """Provide filtering callbacks to innd."""
99 """This runs every time the filter is loaded or reloaded.
100 This is a good place to initialize variables and precompile
101 regular expressions, or maybe reload stats from disk.
103 self.re_newrmgroup = re.compile('(?:new|rm)group\s')
104 self.re_obsctl = re.compile('(?:sendsys|version|uuname)')
105 # Message-ID pattern from a once-common spambot.
106 self.re_none44 = re.compile('none\d+\.yet>')
107 # There is a mad newgrouper who likes to meow.
108 self.re_meow = re.compile("^Meow\!", re.M)
109 # One of my silly addresses.
110 self.re_fluffymorph = re.compile("andruQ@myremarQ.coM", re.I)
112 def filter_before_reload(self):
113 """Runs just before the filter gets reloaded.
115 You can use this method to save state information to be
116 restored by the __init__() method or down in the main module.
118 syslog('notice', "filter_before_reload executing...")
120 def filter_close(self):
121 """Runs when innd exits.
123 You can use this method to save state information to be
124 restored by the __init__() method or down in the main module.
126 syslog('notice', "filter_close running, bye!")
128 def filter_messageid(self, msgid):
129 """Filter articles just by their Message-IDs.
131 This method interacts with the IHAVE and CHECK NNTP commands.
132 If you return a non-empty string here, the offered article
133 will be refused before you ever have to waste any bandwidth
134 looking at it. This is not foolproof, so you should do your
135 ID checks both here and in filter_art. (TAKETHIS does not
136 offer the ID for examination, and a TAKETHIS isn't always
137 preceded by a CHECK.)
139 return "" # Deactivate the samples.
141 if self.re_none44.search(msgid):
142 return "But I don't like spam!"
143 if msgid[0:8] == '<cancel.':
144 return "I don't do cybercancels."
146 def filter_art(self, art):
147 """Decide whether to keep offered articles.
149 art is a dictionary with a bunch of headers, the article's
150 body, and innd's reckoning of the line count. Items not
151 in the article will have a value of None.
153 The available headers are the ones listed near the top of
154 innd/art.c. At this writing, they are:
156 Also-Control, Approved, Bytes, Cancel-Key, Cancel-Lock,
157 Content-Base, Content-Disposition, Content-Transfer-Encoding,
158 Content-Type, Control, Date, Date-Received, Distribution, Expires,
159 Face, Followup-To, From, In-Reply-To, Injection-Date, Injection-Info,
160 Keywords, Lines, List-ID, Message-ID, MIME-Version, Newsgroups,
161 NNTP-Posting-Date, NNTP-Posting-Host, Organization, Originator,
162 Path, Posted, Posting-Version, Received, References, Relay-Version,
163 Reply-To, Sender, Subject, Supersedes, User-Agent,
164 X-Auth, X-Canceled-By, X-Cancelled-By, X-Complaints-To, X-Face,
165 X-HTTP-UserAgent, X-HTTP-Via, X-Mailer, X-Modbot, X-Modtrace,
166 X-Newsposter, X-Newsreader, X-No-Archive, X-Original-Message-ID,
167 X-Original-Trace, X-Originating-IP, X-PGP-Key, X-PGP-Sig,
168 X-Poster-Trace, X-Postfilter, X-Proxy-User, X-Submissions-To,
169 X-Trace, X-Usenet-Provider, Xref.
171 The body is the buffer in art['__BODY__'] and the INN-reckoned
172 line count is held as an integer in art['__LINES__']. (The
173 Lines: header is often generated by the poster, and large
174 differences can be a good indication of a corrupt article.)
176 If you want to keep an article, return None or "". If you
177 want to reject, return a non-empty string. The rejection
178 string will appear in transfer and posting response banners,
179 and local posters will see them if their messages are
182 return "" # Deactivate the samples.
184 # Catch bad Message-IDs from articles fed with TAKETHIS but no CHECK.
185 idcheck = self.filter_messageid(art[Message_ID])
189 # There are some control messages we don't want to process or
190 # forward to other sites.
192 if art[Control] is not None:
193 if self.re_newrmgroup.match(art[Control]):
194 if self.re_meow.search(art[__BODY__]):
195 return "The fake tale meows again."
196 if art[Distribution] == buffer('mxyzptlk'):
197 return "Evil control message from the 10th dimension"
198 if self.re_obsctl.match(art[Control]):
199 return "Obsolete control message"
201 # If you don't know, you don't want to know.
202 if self.re_fluffymorph.search(art[From]):
203 return "No, you may NOT meow."
205 syslog('n', str(sys.exc_info[1]))
207 def filter_mode(self, oldmode, newmode, reason):
208 """Capture server events and do something useful.
210 When the admin throttles or pauses innd (and lets it go
211 again), this method will be called. oldmode is the state we
212 just left, and newmode is where we are going. reason is
213 usually just a comment string.
215 The possible values of newmode and oldmode are the five
216 strings 'running', 'paused', 'throttled', 'shutdown' and
217 'unknown'. Actually 'unknown' shouldn't happen; it's there
218 in case feeping creatures invade innd.
220 syslog('notice', 'state change from %s to %s - %s'
221 % (oldmode, newmode, reason))
225 Okay, that's the end of our class definition. What follows is the
226 stuff you need to do to get it all working inside innd.
229 ## This import must succeed, or your filter won't work. I'll repeat
230 ## that: You MUST import INN.
233 ## Some of the stuff below is gratuitous, just demonstrating how the
234 ## INN.syslog call works. That first thingy tells the Unix syslogger
235 ## what severity to use; you can abbreviate down to one letter and
236 ## it's case insensitive. Available levels are (in increasing levels
237 ## of seriousness) Debug, Info, Notice, Warning, Err, Crit, and
238 ## Alert. If you provide any other string, it will be defaulted to
239 ## Notice. You'll find the entries in the same log files innd itself
240 ## uses, with an 'innd: python:' prefix.
242 ## The native Python syslog module seems to clash with INN, so use
243 ## INN's. Oh yeah -- you may notice that stdout and stderr have been
244 ## redirected to /dev/null -- if you want to print stuff, open your
249 except Exception, errmsg:
250 syslog('Error', "import boo-boo: " + errmsg[0])
253 ## If you want to do something special when the server first starts
254 ## up, this is how to find out when it's time.
256 if 'spamfilter' not in dir():
257 syslog('n', "First load, so I can do initialization stuff.")
258 # You could unpickle a saved hash here, so that your hard-earned
259 # spam scores aren't lost whenever you shut down innd.
261 syslog('NoTicE', "I'm just reloading, so skip the formalities.")
264 ## Finally, here is how we get our class on speaking terms with innd.
265 ## The hook is refreshed on every reload, so that you can change the
266 ## methods on a running server. Don't forget to test your changes
268 spamfilter = InndFilter()
270 set_filter_hook(spamfilter)
271 syslog('n', "spamfilter successfully hooked into INN")
272 except Exception, errmsg:
273 syslog('e', "Cannot obtain INN hook for spamfilter: %s" % errmsg[0])