chiark - git - mdw - exim-config/blob - spam.m4

   1 ### -*-m4-*-
   2 ###
   3 ### Spam filtering for distorted.org.uk Exim configuration
   4 ###
   5 ### (c) 2012 Mark Wooding
   6 ###
   7
   8 ###----- Licensing notice ---------------------------------------------------
   9 ###
  10 ### This program is free software; you can redistribute it and/or modify
  11 ### it under the terms of the GNU General Public License as published by
  12 ### the Free Software Foundation; either version 2 of the License, or
  13 ### (at your option) any later version.
  14 ###
  15 ### This program is distributed in the hope that it will be useful,
  16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 ### GNU General Public License for more details.
  19 ###
  20 ### You should have received a copy of the GNU General Public License
  21 ### along with this program; if not, write to the Free Software Foundation,
  22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  23
  24 DIVERT(null)
  25 ###--------------------------------------------------------------------------
  26 ### Spam filtering.
  27
  28 ## The Exim documentation tells lies.
  29 ##
  30 ## : *${run{*<_command_>* *<_args_>*}{*<_string1_>*}{*<_string2_>*}}*
  31 ## :     The command and its arguments are first expanded separately, [...]
  32 ##
  33 ## They aren't.  The whole command-and-args are expanded together, and then
  34 ## split at unquoted spaces.  This unpleasant hack sorts out the mess.
  35 m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)
  36
  37 ## Utilities for collecting spam limits.
  38 m4_define(<:SPAMLIMIT_CHECK:>,
  39         <:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>)
  40
  41 m4_define(<:SPAMLIMIT_ROUTER:>,
  42 <:$1:
  43         driver = redirect
  44         data = :unknown:
  45         verify_only = true
  46         condition = ${if !eq{$acl_c_mode}{submission}}
  47         condition = ${extract{spam_limit}{$address_data}{false}{true}}:>)
  48
  49 m4_define(<:SPAMLIMIT_SET:>,
  50         <:address_data = \
  51                 ${if def:address_data {$address_data}{}} \
  52                 $1:>)
  53
  54 m4_define(<:SPAMLIMIT_LOOKUP:>,
  55         <:condition = ${if exists{$1}}
  56         SPAMLIMIT_SET(<:${lookup {$2@$3/$4} nwildlsearch {$1} \
  57                                {SPAMLIMIT_CHECK($value)}}:>):>)
  58
  59 m4_define(<:SPAMLIMIT_USERV:>,
  60         <:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \
  61                                         userv CONF_userv_opts \
  62                                         SHQUOTE($1) exim-spam-limit \
  63                                         SHQUOTE($4) \
  64                                         SHQUOTE($2) SHQUOTE(@$3)} \
  65                                 {SPAMLIMIT_CHECK($value)}}:>):>)
  66
  67 m4_define(<:GET_ADDRDATA:>,
  68         <:extract{<:$1:>}{${if def:address_data{$address_data}{}}}:>)
  69
  70 SECTION(global, policy)m4_dnl
  71 spamd_address = CONF_spamd_address CONF_spamd_port
  72
  73 SECTION(acl, rcpt-hooks)m4_dnl
  74         ## Do per-recipient spam-filter processing.
  75         require  acl = rcpt_spam
  76
  77 SECTION(acl, misc)m4_dnl
  78 skip_spam_check:
  79
  80         ## If the client is trusted, or this is a new submission, don't
  81         ## bother with any of this.  We will have verified the sender
  82         ## fairly aggressively before granting this level of trust.
  83         accept   hosts = +trusted
  84         accept   condition = ${if eq{$acl_c_mode}{submission}}
  85
  86         ## If all domains have disabled spam checking then don't check.
  87         accept  !condition = $acl_c_spam_check_domain
  88
  89         ## Otherwise we should check.
  90         deny
  91
  92 rcpt_spam:
  93
  94         ## If this is a virtual domain, and it says `spam-check=no', then we
  95         ## shouldn't check spam.  But we can't check domains at DATA time, so
  96         ## instead we must track whether all recipients have disabled
  97         ## checking.
  98         warn    !domains = ${if exists{CONF_sysconf_dir/domains.conf} \
  99                          {partial0-lsearch; CONF_sysconf_dir/domains.conf} \
 100                          {}}
 101                  set acl_c_spam_check_domain = true
 102         warn    !condition = $acl_c_spam_check_domain
 103                  condition = DOMKV(spam-check, {${expand:$value}}{true})
 104                  set acl_c_spam_check_domain = true
 105
 106         ## See if we should do this check.
 107         accept   acl = skip_spam_check
 108
 109         ## Always accept mail to `postmaster'.  Currently this is not
 110         ## negotiable; maybe a tweak can be added to `domains.conf' if
 111         ## necessary.
 112         accept   local_parts = postmaster
 113
 114         ## Collect the user's spam threshold from the `address_data'
 115         ## variable, where it was left by the `fetch_spam_limit' router
 116         ## during recipient verification.  (This just saves duplicating this
 117         ## enormous expression.)
 118         warn     set acl_m_this_spam_limit = \
 119                         ${sg {${GET_ADDRDATA(spam_limit){$value}{nil}}} \
 120                              {^(|.*\\D.*)\$}{CONF_spam_max}}
 121
 122         ## If there's a spam limit already established, and it's different
 123         ## from this user's limit, then the sender will have to try this user
 124         ## again later.
 125         defer   !hosts = +trusted
 126                  message = "You'd better try this one later"
 127                  condition = ${if def:acl_m_spam_limit {true}{false}}
 128                  condition = ${if ={$acl_m_spam_limit} \
 129                                    {$acl_m_this_spam_limit} \
 130                                   {false}{true}}
 131
 132         ## There's no limit set yet, or the user's limit is the same as the
 133         ## existing one, or the client's local and we're not checking for
 134         ## spam anyway.  Whichever way, it's safe to set it now.
 135         warn     set acl_m_spam_limit = $acl_m_this_spam_limit
 136
 137         ## All done.
 138         accept
 139
 140 SECTION(acl, data-spam)m4_dnl
 141         ## Do spam checking.
 142         require  acl = data_spam
 143
 144 SECTION(acl, misc)m4_dnl
 145 data_spam:
 146
 147         ## See if we should do this check.
 148         accept   acl = skip_spam_check
 149
 150         ## Check header validity.
 151         require  verify = header_syntax
 152
 153         ## Check the message for spam, comparing to the configured limit.
 154         warn     spam = exim:true
 155
 156         ## Format some reporting stuff.
 157         warn
 158
 159                  ## Convert the limit (currently 10x fixed point) into a
 160                  ## decimal for presentation.
 161                  set acl_m_spam_limit_presentation = \
 162                         ${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}
 163
 164                  ## Convert the report into something less obnoxious.  Plain
 165                  ## old SpamAssassin has an `X-Spam-Status' header which
 166                  ## lists the matched rules and provides some other basic
 167                  ## information.  Try to extract something similar from the
 168                  ## report.
 169                  ##
 170                  ## This is rather fiddly.
 171
 172                  ## Firstly, escape angle brackets, because we'll be using
 173                  ## them for our own purposes.
 174                  set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}
 175
 176                  ## Trim off the blurb paragraph and the preview.  The rest
 177                  ## should be fairly well behaved.  Wrap double angle-
 178                  ## brackets around the remainder; these can't appear in the
 179                  ## body because we escaped them all earlier.
 180                  set acl_m_spam_tests = \
 181                         ${sg{$acl_m_spam_tests} \
 182                             {\N^(?s).*\n Content analysis details:(.*)$\N} \
 183                             {<<\$1>>}}
 184
 185                  ## Extract the information about the matching rules and
 186                  ## their scores.  Leave `<<...>>' around everything else.
 187                  set acl_m_spam_tests = \
 188                         ${sg{$acl_m_spam_tests} \
 189                             {\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
 190                             {>>\$2:\$1,<<}}
 191
 192                  ## Strip everything still in `<<...>>' pairs, including any
 193                  ## escaped characters inside.
 194                  set acl_m_spam_tests = \
 195                         ${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+|!.)*>>\N}{}}
 196
 197                  ## Trim off a trailing comma.
 198                  set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}
 199
 200                  ## Undo the escaping.
 201                  set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}
 202
 203         ## If we've decided to reject, then tell the sender to get knotted.
 204         deny     message = Tinned meat product detected ($spam_score)
 205                  condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
 206                                   {true}{false}}
 207
 208         ## Insert headers from the spam check now that we've decided to
 209         ## accept the message.
 210         warn
 211                  ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Score: \
 212                         $spam_score/$acl_m_spam_limit_presentation \
 213                         ($spam_bar):>)
 214                  ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Status: \
 215                         score=$spam_score, \
 216                         limit=$acl_m_spam_limit_presentation, \n\t\
 217                         tests=$acl_m_spam_tests:>)
 218
 219         ## We're good.
 220         accept
 221
 222 DIVERT(null)
 223 ###----- That's all, folks --------------------------------------------------