[exim-config] / spam.m4

### -*-m4-*-
###
### Spam filtering for distorted.org.uk Exim configuration
###
### (c) 2012 Mark Wooding
###

###----- Licensing notice ---------------------------------------------------
###
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software Foundation,
### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

DIVERT(null)
###--------------------------------------------------------------------------
### Spam filtering.

## The Exim documentation tells lies.
##
## : *${run{*<_command_>* *<_args_>*}{*<_string1_>*}{*<_string2_>*}}*
## :     The command and its arguments are first expanded separately, [...]
##
## They aren't.  The whole command-and-args are expanded together, and then
## split at unquoted spaces.  This unpleasant hack sorts out the mess.
m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)

## Utilities for collecting spam limits.
m4_define(<:SPAMLIMIT_CHECK:>,
	<:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>)

m4_define(<:SPAMLIMIT_ROUTER:>,
<:$1:
	driver = redirect
	data = :unknown:
	verify_only = true
	condition = ${if !eq{$acl_c_mode}{submission}}
	condition = ${extract{spam_limit}{$address_data}{false}{true}}:>)

m4_define(<:SPAMLIMIT_SET:>,
	<:address_data = \
		${if def:address_data {$address_data}{}} \
		m4_ifelse(<:$2:>, <::>, <::>, <:$2 \
		:>)$1:>)

m4_define(<:SPAMLIMIT_LOOKUP:>,
	<:condition = ${if exists{$1}}
	SPAMLIMIT_SET(<:${lookup {$2@$3/$4} nwildlsearch {$1} \
			       {SPAMLIMIT_CHECK(<:$value:>)}}:>, <:$5:>):>)

m4_define(<:SPAMLIMIT_USERV:>,
	<:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \
					/usr/bin/userv CONF_userv_opts \
					SHQUOTE($1) exim-spam-limit \
					SHQUOTE($4) \
					SHQUOTE($2) SHQUOTE(@$3)} \
				{SPAMLIMIT_CHECK(<:$value:>)}}:>, <:$5:>):>)

m4_define(<:GET_ADDRDATA:>,
	<:extract{<:$1:>}{${if def:address_data{$address_data}{}}}:>)

SECTION(global, policy)m4_dnl
spamd_address = CONF_spamd_address CONF_spamd_port

SECTION(acl, rcpt-hooks)m4_dnl
	## Do per-recipient spam-filter processing.
	require	 acl = rcpt_spam

SECTION(acl, misc)m4_dnl
skip_spam_check:

	## If the client is trusted, or this is a new submission, don't
	## bother with any of this.  We will have verified the sender
	## fairly aggressively before granting this level of trust.
	accept	 hosts = +trusted
	accept	 condition = ${if eq{$acl_c_mode}{submission}}

	## If all domains have disabled spam checking then don't check.
	accept	!condition = $acl_c_spam_check_domain

	## Otherwise we should check.
	deny

rcpt_spam:

	## If this is a virtual domain, and it says `spam-check=no', then we
	## shouldn't check spam.  But we can't check domains at DATA time, so
	## instead we must track whether all recipients have disabled
	## checking.
	warn	!domains = ${if exists{CONF_sysconf_dir/domains.conf} \
			 {partial0-lsearch; CONF_sysconf_dir/domains.conf} \
			 {}}
		 set acl_c_spam_check_domain = true
	warn	!condition = $acl_c_spam_check_domain
		 condition = DOMKV(spam-check, {${expand:$value}}{true})
		 set acl_c_spam_check_domain = true

	## See if we should do this check.
	accept	 acl = skip_spam_check

	## Always accept mail to `postmaster'.  Currently this is not
	## negotiable; maybe a tweak can be added to `domains.conf' if
	## necessary.
	accept	 local_parts = postmaster

	## Collect the user's spam threshold from the `address_data'
	## variable, where it was left by the `fetch_spam_limit' router
	## during recipient verification.  (This just saves duplicating this
	## enormous expression.)
	warn	 set acl_m_this_spam_limit = \
			${sg {${GET_ADDRDATA(spam_limit){$value}{nil}}} \
			     {^(|.*\\D.*)\$}{CONF_spam_max}}

	warn	 condition = ${GET_ADDRDATA(user){true}{false}}
		 set acl_m_spam_users = \
			${if def:acl_m_spam_users {$acl_m_spam_users::}{}}\
			${GET_ADDRDATA(user) \
				{$value=${sg{$local_part@$domain}\
					    {([!:])}{!\$1}}} \
				fail}

	## If there's a spam limit already established, and it's different
	## from this user's limit, then the sender will have to try this user
	## again later.
	defer	!hosts = +trusted
		 message = "You'd better try this one later"
		 condition = ${if def:acl_m_spam_limit {true}{false}}
		 condition = ${if ={$acl_m_spam_limit} \
				   {$acl_m_this_spam_limit} \
				  {false}{true}}

	## There's no limit set yet, or the user's limit is the same as the
	## existing one, or the client's local and we're not checking for
	## spam anyway.  Whichever way, it's safe to set it now.
	warn	 set acl_m_spam_limit = $acl_m_this_spam_limit

	## All done.
	accept

SECTION(acl, data-hooks)m4_dnl
	## Do spam checking.
	require	 acl = data_spam

SECTION(acl, misc)m4_dnl
data_spam:

	## See if we should do this check.
	accept	 acl = skip_spam_check

	## Check header validity.
	require	 verify = header_syntax

	## Check the message for spam, comparing to the configured limit.
	warn	 spam = exim:true

	## Format some reporting stuff.
	warn

		 ## Convert the limit (currently 10x fixed point) into a
		 ## decimal for presentation.
		 set acl_m_spam_limit_presentation = \
			${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}

		 ## Convert the report into something less obnoxious.  Plain
		 ## old SpamAssassin has an `X-Spam-Status' header which
		 ## lists the matched rules and provides some other basic
		 ## information.  Try to extract something similar from the
		 ## report.
		 ##
		 ## This is rather fiddly.

		 ## Firstly, escape angle brackets, because we'll be using
		 ## them for our own purposes.
		 set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}

		 ## Trim off the blurb paragraph and the preview.  The rest
		 ## should be fairly well behaved.  Wrap double angle-
		 ## brackets around the remainder; these can't appear in the
		 ## body because we escaped them all earlier.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests} \
			    {\N^(?s).*\n Content analysis details:(.*)$\N} \
			    {<<\$1>>}}

		 ## Extract the information about the matching rules and
		 ## their scores.  Leave `<<...>>' around everything else.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests} \
			    {\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
			    {>>\$2:\$1,<<}}

		 ## Strip everything still in `<<...>>' pairs, including any
		 ## escaped characters inside.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+|!.)*>>\N}{}}

		 ## Trim off a trailing comma.
		 set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}

		 ## Undo the escaping.
		 set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}

	## If we've decided to reject, then leave a dropping in the log file
	## so that users can analyse rejections for incoming messages, and
	## tell the sender to get knotted.
	deny	 message = Tinned meat product detected ($spam_score)
		 log_message = Spam rejection \
			score=$spam_score \
			limit=$acl_m_spam_limit_presentation \
			tests=$acl_m_spam_tests \
			users=$acl_m_spam_users
		 condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
				  {true}{false}}

	## Insert headers from the spam check now that we've decided to
	## accept the message.
	warn
		 ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Score: \
			$spam_score/$acl_m_spam_limit_presentation \
			($spam_bar):>)
		 ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Status: \
			score=$spam_score, \
			limit=$acl_m_spam_limit_presentation, \n\t\
			tests=$acl_m_spam_tests:>)

	## We're good.
	accept

DIVERT(null)
###----- That's all, folks --------------------------------------------------
Commit	Line	Data
185b5456 MW	1	### --m4--
	2	###
	3	### Spam filtering for distorted.org.uk Exim configuration
	4	###
	5	### (c) 2012 Mark Wooding
	6	###
	7
	8	###----- Licensing notice ---------------------------------------------------
	9	###
	10	### This program is free software; you can redistribute it and/or modify
	11	### it under the terms of the GNU General Public License as published by
	12	### the Free Software Foundation; either version 2 of the License, or
	13	### (at your option) any later version.
	14	###
	15	### This program is distributed in the hope that it will be useful,
	16	### but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	### GNU General Public License for more details.
	19	###
	20	### You should have received a copy of the GNU General Public License
	21	### along with this program; if not, write to the Free Software Foundation,
	22	### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	23
	24	DIVERT(null)
	25	###--------------------------------------------------------------------------
	26	### Spam filtering.
	27
953ae20e MW	28	## The Exim documentation tells lies.
	29	##
	30	## : ${run{<_command_>* <_args_>}{<_string1_>}{<_string2_>}}*
	31	## : The command and its arguments are first expanded separately, [...]
	32	##
	33	## They aren't. The whole command-and-args are expanded together, and then
	34	## split at unquoted spaces. This unpleasant hack sorts out the mess.
	35	m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)
	36
02af00e7 MW	37	## Utilities for collecting spam limits.
	38	m4_define(<:SPAMLIMIT_CHECK:>,
	39	<:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>)
	40
	41	m4_define(<:SPAMLIMIT_ROUTER:>,
	42	<:$1:
	43	driver = redirect
	44	data = :unknown:
	45	verify_only = true
	46	condition = ${if !eq{$acl_c_mode}{submission}}
	47	condition = ${extract{spam_limit}{$address_data}{false}{true}}:>)
	48
	49	m4_define(<:SPAMLIMIT_SET:>,
	50	<:address_data = \
	51	${if def:address_data {$address_data}{}} \
33dbcec9 MW	52	m4_ifelse(<:$2:>, <::>, <::>, <:$2 \
33dbcec9 MW	53	:>)$1:>)
02af00e7 MW	54
	55	m4_define(<:SPAMLIMIT_LOOKUP:>,
	56	<:condition = ${if exists{$1}}
d057af07	57	SPAMLIMIT_SET(<:${lookup {$2@$3/$4} nwildlsearch {$1} \
33dbcec9	58	{SPAMLIMIT_CHECK(<:$value:>)}}:>, <:$5:>):>)
02af00e7 MW	59
	60	m4_define(<:SPAMLIMIT_USERV:>,
	61	<:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \
ab42b10f	62	/usr/bin/userv CONF_userv_opts \
02af00e7	63	SHQUOTE($1) exim-spam-limit \
d057af07 MW	64	SHQUOTE($4) \
d057af07 MW	65	SHQUOTE($2) SHQUOTE(@$3)} \
33dbcec9	66	{SPAMLIMIT_CHECK(<:$value:>)}}:>, <:$5:>):>)
02af00e7	67
043a914f MW	68	m4_define(<:GET_ADDRDATA:>,
	69	<:extract{<:$1:>}{${if def:address_data{$address_data}{}}}:>)
	70
185b5456 MW	71	SECTION(global, policy)m4_dnl
	72	spamd_address = CONF_spamd_address CONF_spamd_port
	73
185b5456 MW	74	SECTION(acl, rcpt-hooks)m4_dnl
	75	## Do per-recipient spam-filter processing.
	76	require acl = rcpt_spam
	77
	78	SECTION(acl, misc)m4_dnl
b8b0f13c	79	skip_spam_check:
185b5456	80
b8b0f13c MW	81	## If the client is trusted, or this is a new submission, don't
	82	## bother with any of this. We will have verified the sender
	83	## fairly aggressively before granting this level of trust.
185b5456	84	accept hosts = +trusted
b8b0f13c MW	85	accept condition = ${if eq{$acl_c_mode}{submission}}
b8b0f13c MW	86
87afdfcd MW	87	## If all domains have disabled spam checking then don't check.
87afdfcd MW	88	accept !condition = $acl_c_spam_check_domain
48507a38	89
b8b0f13c MW	90	## Otherwise we should check.
	91	deny
	92
	93	rcpt_spam:
	94
87afdfcd MW	95	## If this is a virtual domain, and it says `spam-check=no', then we
	96	## shouldn't check spam. But we can't check domains at DATA time, so
	97	## instead we must track whether all recipients have disabled
	98	## checking.
	99	warn !domains = ${if exists{CONF_sysconf_dir/domains.conf} \
	100	{partial0-lsearch; CONF_sysconf_dir/domains.conf} \
	101	{}}
	102	set acl_c_spam_check_domain = true
	103	warn !condition = $acl_c_spam_check_domain
	104	condition = DOMKV(spam-check, {${expand:$value}}{true})
	105	set acl_c_spam_check_domain = true
	106
b8b0f13c MW	107	## See if we should do this check.
b8b0f13c MW	108	accept acl = skip_spam_check
185b5456	109
aa935c91 MW	110	## Always accept mail to `postmaster'. Currently this is not
	111	## negotiable; maybe a tweak can be added to `domains.conf' if
	112	## necessary.
	113	accept local_parts = postmaster
	114
185b5456 MW	115	## Collect the user's spam threshold from the `address_data'
	116	## variable, where it was left by the `fetch_spam_limit' router
	117	## during recipient verification. (This just saves duplicating this
	118	## enormous expression.)
	119	warn set acl_m_this_spam_limit = \
043a914f	120	${sg {${GET_ADDRDATA(spam_limit){$value}{nil}}} \
185b5456 MW	121	{^(\|.\\D.)\$}{CONF_spam_max}}
185b5456 MW	122
33dbcec9 MW	123	warn condition = ${GET_ADDRDATA(user){true}{false}}
	124	set acl_m_spam_users = \
	125	${if def:acl_m_spam_users {$acl_m_spam_users::}{}}\
	126	${GET_ADDRDATA(user) \
	127	{$value=${sg{$local_part@$domain}\
	128	{([!:])}{!\$1}}} \
	129	fail}
	130
185b5456 MW	131	## If there's a spam limit already established, and it's different
	132	## from this user's limit, then the sender will have to try this user
	133	## again later.
	134	defer !hosts = +trusted
	135	message = "You'd better try this one later"
	136	condition = ${if def:acl_m_spam_limit {true}{false}}
	137	condition = ${if ={$acl_m_spam_limit} \
	138	{$acl_m_this_spam_limit} \
	139	{false}{true}}
	140
	141	## There's no limit set yet, or the user's limit is the same as the
	142	## existing one, or the client's local and we're not checking for
	143	## spam anyway. Whichever way, it's safe to set it now.
	144	warn set acl_m_spam_limit = $acl_m_this_spam_limit
	145
	146	## All done.
	147	accept
	148
06dfc2db	149	SECTION(acl, data-hooks)m4_dnl
185b5456 MW	150	## Do spam checking.
	151	require acl = data_spam
	152
	153	SECTION(acl, misc)m4_dnl
	154	data_spam:
	155
b8b0f13c MW	156	## See if we should do this check.
b8b0f13c MW	157	accept acl = skip_spam_check
185b5456	158
09ca3919 MW	159	## Check header validity.
	160	require verify = header_syntax
	161
185b5456	162	## Check the message for spam, comparing to the configured limit.
3e96b3ae	163	warn spam = exim:true
185b5456	164
3e96b3ae	165	## Format some reporting stuff.
185b5456	166	warn
a882a548	167
185b5456 MW	168	## Convert the limit (currently 10x fixed point) into a
	169	## decimal for presentation.
	170	set acl_m_spam_limit_presentation = \
	171	${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}
	172
	173	## Convert the report into something less obnoxious. Plain
	174	## old SpamAssassin has an `X-Spam-Status' header which
	175	## lists the matched rules and provides some other basic
	176	## information. Try to extract something similar from the
	177	## report.
	178	##
	179	## This is rather fiddly.
	180
	181	## Firstly, escape angle brackets, because we'll be using
	182	## them for our own purposes.
	183	set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}
	184
	185	## Trim off the blurb paragraph and the preview. The rest
	186	## should be fairly well behaved. Wrap double angle-
	187	## brackets around the remainder; these can't appear in the
	188	## body because we escaped them all earlier.
	189	set acl_m_spam_tests = \
	190	${sg{$acl_m_spam_tests} \
	191	{\N^(?s).\n Content analysis details:(.)$\N} \
	192	{<<\$1>>}}
	193
	194	## Extract the information about the matching rules and
	195	## their scores. Leave `<<...>>' around everything else.
	196	set acl_m_spam_tests = \
	197	${sg{$acl_m_spam_tests} \
4ff4d304	198	{\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
185b5456 MW	199	{>>\$2:\$1,<<}}
	200
	201	## Strip everything still in `<<...>>' pairs, including any
	202	## escaped characters inside.
	203	set acl_m_spam_tests = \
	204	${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+\|!.)*>>\N}{}}
	205
	206	## Trim off a trailing comma.
	207	set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}
	208
	209	## Undo the escaping.
	210	set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}
	211
33dbcec9 MW	212	## If we've decided to reject, then leave a dropping in the log file
	213	## so that users can analyse rejections for incoming messages, and
	214	## tell the sender to get knotted.
3e96b3ae	215	deny message = Tinned meat product detected ($spam_score)
33dbcec9 MW	216	log_message = Spam rejection \
	217	score=$spam_score \
	218	limit=$acl_m_spam_limit_presentation \
	219	tests=$acl_m_spam_tests \
	220	users=$acl_m_spam_users
3e96b3ae MW	221	condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
	222	{true}{false}}
	223
	224	## Insert headers from the spam check now that we've decided to
	225	## accept the message.
	226	warn
1e5ccd7c	227	ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Score: \
185b5456	228	$spam_score/$acl_m_spam_limit_presentation \
1e5ccd7c MW	229	($spam_bar):>)
1e5ccd7c MW	230	ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Status: \
185b5456 MW	231	score=$spam_score, \
185b5456 MW	232	limit=$acl_m_spam_limit_presentation, \n\t\
1e5ccd7c	233	tests=$acl_m_spam_tests:>)
185b5456	234
185b5456 MW	235	## We're good.
	236	accept
	237
	238	DIVERT(null)
	239	###----- That's all, folks --------------------------------------------------