chiark / gitweb /
make-secnet-sites: Taint the `group' parameter
[secnet.git] / make-secnet-sites
index f32267824cbd954177416cb9bcfac3c9582e73f6..668dbf635b02ec9e98533e5e8b261297614074fb 100755 (executable)
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#! /usr/bin/env python3
 #
 # This file is part of secnet.
 # See README for full list of copyright holders.
@@ -46,12 +46,12 @@ no-suppress-args
 cd ~/secnet/sites-test/
 execute ~/secnet/make-secnet-sites.py -u vpnheader groupfiles sites
 
-This program is part of secnet. It relies on the "ipaddr" library from
-Cendio Systems AB.
+This program is part of secnet.
 
 """
 
 from __future__ import print_function
+from __future__ import unicode_literals
 from builtins import int
 
 import string
@@ -60,8 +60,10 @@ import sys
 import os
 import getopt
 import re
+import argparse
+import math
 
-import ipaddr
+import ipaddress
 
 # entry 0 is "near the executable", or maybe from PYTHONPATH=.,
 # which we don't want to preempt
@@ -71,24 +73,162 @@ import ipaddrset
 
 VERSION="0.1.18"
 
-# Are we being invoked from userv?
-service=0
-# If we are, which group does the caller want to modify?
-group=None
-
-if len(sys.argv)<2:
-       inputfile=None
-       of=sys.stdout
-else:
-       if sys.argv[1]=='-u':
-               if len(sys.argv)!=6:
+from sys import version_info
+if version_info.major == 2:  # for python2
+    import codecs
+    sys.stdin = codecs.getreader('utf-8')(sys.stdin)
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
+    import io
+    open=lambda f,m='r': io.open(f,m,encoding='utf-8')
+
+max={'rsa_bits':8200,'name':33,'dh_bits':8200}
+
+class Tainted:
+       def __init__(self,s,tline=None,tfile=None):
+               self._s=s
+               self._ok=None
+               self._line=line if tline is None else tline
+               self._file=file if tfile is None else tfile
+       def __eq__(self,e):
+               return self._s==e
+       def __ne__(self,e):
+               # for Python2
+               return not self.__eq__(e)
+       def __str__(self):
+               raise RuntimeError('direct use of Tainted value')
+       def __repr__(self):
+               return 'Tainted(%s)' % repr(self._s)
+
+       def _bad(self,what,why):
+               assert(self._ok is not True)
+               self._ok=False
+               complain('bad parameter: %s: %s' % (what, why))
+               return self
+
+       def _max_ok(self,what,maxlen):
+               if len(self._s) > maxlen:
+                       self._bad(what,'too long (max %d)' % maxlen)
+               return self
+
+       def _re_ok(self,bad,what,maxlen=None):
+               if maxlen is None: maxlen=max[what]
+               self._max_ok(what,maxlen)
+               if self._ok is False: return self
+               if bad.search(self._s): return self._bad(what,'bad syntax')
+               return self
+
+       def _rtnval(self, is_ok, ifgood, ifbad=''):
+               if is_ok:
+                       assert(self._ok is not False)
+                       self._ok=True
+                       return ifgood
+               else:
+                       assert(self._ok is not True)
+                       self._ok=False
+                       return ifbad
+
+       def _rtn(self, is_ok, ifbad=''):
+               return self._rtnval(is_ok, self._s, ifbad)
+
+       def raw(self):
+               return self._s
+       def raw_mark_ok(self):
+               # caller promises to throw if syntax was dangeorus
+               return self._rtn(True)
+
+       def output(self):
+               if self._ok is False: return ''
+               if self._ok is True: return self._s
+               print('%s:%d: unchecked/unknown additional data "%s"' %
+                     (self._file,self._line,self._s),
+                     file=sys.stderr)
+               sys.exit(1)
+
+       bad_name=re.compile(r'^[^a-zA-Z]|[^-_0-9a-zA-Z]')
+       # secnet accepts _ at start of names, but we reserve that
+       bad_name_counter=0
+       def name(self):
+               ok=self._re_ok(Tainted.bad_name,'name')
+               return self._rtn(ok,
+                                '_line%d_%s' % (self._line, id(self)))
+
+       def keyword(self):
+               ok=self._s in keywords or self._s in levels
+               if not ok:
+                       complain('unknown keyword %s' % self._s)
+               return self._rtn(ok)
+
+       bad_hex=re.compile(r'[^0-9a-fA-F]')
+       def bignum_16(self,kind,what):
+               maxlen=(max[kind+'_bits']+3)/4
+               ok=self._re_ok(Tainted.bad_hex,what,maxlen)
+               return self._rtn(ok)
+
+       bad_num=re.compile(r'[^0-9]')
+       def bignum_10(self,kind,what):
+               maxlen=math.ceil(max[kind+'_bits'] / math.log10(2))
+               ok=self._re_ok(Tainted.bad_num,what,maxlen)
+               return self._rtn(ok)
+
+       def number(self,minn,maxx,what='number'):
+               # not for bignums
+               ok=self._re_ok(Tainted.bad_num,what,10)
+               if ok:
+                       v=int(self._s)
+                       if v<minn or v>maxx:
+                               ok=self._bad(what,'out of range %d..%d'
+                                            % (minn,maxx))
+               return self._rtnval(ok,v,minn)
+
+       bad_host=re.compile(r'[^-\][_.:0-9a-zA-Z]')
+       # We permit _ so we can refer to special non-host domains
+       # which have A and AAAA RRs.  This is a crude check and we may
+       # still produce config files with syntactically invalid
+       # domains or addresses, but that is OK.
+       def host(self):
+               ok=self._re_ok(Tainted.bad_host,'host/address',255)
+               return self._rtn(ok)
+
+       bad_email=re.compile(r'[^-._0-9a-z@!$%^&*=+~/]')
+       # ^ This does not accept all valid email addresses.  That's
+       # not really possible with this input syntax.  It accepts
+       # all ones that don't require quoting anywhere in email
+       # protocols (and also accepts some invalid ones).
+       def email(self):
+               ok=self._re_ok(Tainted.bad_email,'email address',1023)
+               return self._rtn(ok)
+
+       bad_groupname=re.compile(r'^[^_A-Za-z]|[^-+_0-9A-Za-z]')
+       def groupname(self):
+               ok=self._re_ok(Tainted.bad_groupname,'group name',64)
+               return self._rtn(ok)
+
+def parse_args():
+       global service
+       global inputfile
+       global header
+       global groupfiledir
+       global sitesfile
+       global group
+       global user
+       global of
+
+       ap = argparse.ArgumentParser(description='process secnet sites files')
+       ap.add_argument('--userv', '-u', action='store_true',
+                       help='userv service fragment update mode')
+       ap.add_argument('--prefix', '-P', nargs=1,
+                       help='set prefix')
+       ap.add_argument('arg',nargs=argparse.REMAINDER)
+       av = ap.parse_args()
+       #print(repr(av), file=sys.stderr)
+       service = 1 if av.userv else 0
+       if service:
+               if len(av.arg)!=4:
                        print("Wrong number of arguments")
                        sys.exit(1)
-               service=1
-               header=sys.argv[2]
-               groupfiledir=sys.argv[3]
-               sitesfile=sys.argv[4]
-               group=sys.argv[5]
+               (header, groupfiledir, sitesfile, group) = av.arg
+               group = Tainted(group,0,'command line')
+               # untrusted argument from caller
                if "USERV_USER" not in os.environ:
                        print("Environment variable USERV_USER not found")
                        sys.exit(1)
@@ -105,16 +245,14 @@ else:
                        print("caller not in group %s"%group)
                        sys.exit(1)
        else:
-               if sys.argv[1]=='-P':
-                       prefix=sys.argv[2]
-                       sys.argv[1:3]=[]
-               if len(sys.argv)>3:
+               if len(av.arg)>3:
                        print("Too many arguments")
                        sys.exit(1)
-               inputfile=sys.argv[1]
-               of=sys.stdout
-               if len(sys.argv)>2:
-                       of=open(sys.argv[2],'w')
+               (inputfile, outputfile) = (av.arg + [None]*2)[0:2]
+               if outputfile is None: of=sys.stdout
+               else: of=open(sys.argv[2],'w')
+
+parse_args()
 
 # Classes describing possible datatypes in the configuration file
 
@@ -122,7 +260,7 @@ class basetype:
        "Common protocol for configuration types."
        def add(self,obj,w):
                complain("%s %s already has property %s defined"%
-                       (obj.type,obj.name,w[0]))
+                       (obj.type,obj.name,w[0].raw()))
 
 class conflist:
        "A list of some kind of configuration type."
@@ -139,7 +277,7 @@ def listof(subtype):
 class single_ipaddr (basetype):
        "An IP address"
        def __init__(self,w):
-               self.addr=ipaddr.IPAddress(w[1])
+               self.addr=ipaddress.ip_address(w[1].raw_mark_ok())
        def __str__(self):
                return '"%s"'%self.addr
 
@@ -148,7 +286,7 @@ class networks (basetype):
        def __init__(self,w):
                self.set=ipaddrset.IPAddressSet()
                for i in w[1:]:
-                       x=ipaddr.IPNetwork(i,strict=True)
+                       x=ipaddress.ip_network(i.raw_mark_ok(),strict=True)
                        self.set.append([x])
        def __str__(self):
                return ",".join(map((lambda n: '"%s"'%n), self.set.networks()))
@@ -156,34 +294,41 @@ class networks (basetype):
 class dhgroup (basetype):
        "A Diffie-Hellman group"
        def __init__(self,w):
-               self.mod=w[1]
-               self.gen=w[2]
+               self.mod=w[1].bignum_16('dh','dh mod')
+               self.gen=w[2].bignum_16('dh','dh gen')
        def __str__(self):
                return 'diffie-hellman("%s","%s")'%(self.mod,self.gen)
 
 class hash (basetype):
        "A choice of hash function"
        def __init__(self,w):
-               self.ht=w[1]
+               hname=w[1]
+               self.ht=hname.raw()
                if (self.ht!='md5' and self.ht!='sha1'):
                        complain("unknown hash type %s"%(self.ht))
+                       self.ht=None
+               else:
+                       hname.raw_mark_ok()
        def __str__(self):
                return '%s'%(self.ht)
 
 class email (basetype):
        "An email address"
        def __init__(self,w):
-               self.addr=w[1]
+               self.addr=w[1].email()
        def __str__(self):
                return '<%s>'%(self.addr)
 
 class boolean (basetype):
        "A boolean"
        def __init__(self,w):
-               if re.match('[TtYy1]',w[1]):
+               v=w[1]
+               if re.match('[TtYy1]',v.raw()):
                        self.b=True
-               elif re.match('[FfNn0]',w[1]):
+                       v.raw_mark_ok()
+               elif re.match('[FfNn0]',v.raw()):
                        self.b=False
+                       v.raw_mark_ok()
                else:
                        complain("invalid boolean value");
        def __str__(self):
@@ -192,26 +337,25 @@ class boolean (basetype):
 class num (basetype):
        "A decimal number"
        def __init__(self,w):
-               self.n=int(w[1])
+               self.n=w[1].number(0,0x7fffffff)
        def __str__(self):
                return '%d'%(self.n)
 
 class address (basetype):
        "A DNS name and UDP port number"
        def __init__(self,w):
-               self.adr=w[1]
-               self.port=int(w[2])
-               if (self.port<1 or self.port>65535):
-                       complain("invalid port number")
+               self.adr=w[1].host()
+               self.port=w[2].number(1,65536,'port')
        def __str__(self):
                return '"%s"; port %d'%(self.adr,self.port)
 
 class rsakey (basetype):
        "An RSA public key"
        def __init__(self,w):
-               self.l=int(w[1])
-               self.e=w[2]
-               self.n=w[3]
+               self.l=w[1].number(0,max['rsa_bits'],'rsa len')
+               self.e=w[2].bignum_10('rsa','rsa e')
+               self.n=w[3].bignum_10('rsa','rsa n')
+               if len(w) >= 5: w[4].email()
        def __str__(self):
                return 'rsa-public("%s","%s")'%(self.e,self.n)
 
@@ -257,7 +401,8 @@ class level:
        allow_properties={}
        require_properties={}
        def __init__(self,w):
-               self.name=w[1]
+               self.type=w[0].keyword()
+               self.name=w[1].name()
                self.properties={}
                self.children={}
        def indent(self,w,t):
@@ -315,7 +460,7 @@ class locationlevel(level):
        }
        def __init__(self,w):
                level.__init__(self,w)
-               self.group=w[2]
+               self.group=w[2].groupname()
        def output_vpnflat(self,w,ind,h):
                self.indent(w,ind)
                # The "h=h,self=self" abomination below exists because
@@ -384,29 +529,42 @@ def moan(msg):
        print(msg);
        complaints=complaints+1
 
-root=level(['root','root'])   # All vpns are children of this node
+class UntaintedRoot():
+       def __init__(self,s): self._s=s
+       def name(self): return self._s
+       def keyword(self): return self._s
+
+root=level([UntaintedRoot(x) for x in ['root','root']])
+# All vpns are children of this node
 obstack=[root]
 allow_defs=0   # Level above which new definitions are permitted
 prefix=''
 
 def set_property(obj,w):
        "Set a property on a configuration node"
-       if w[0] in obj.properties:
-               obj.properties[w[0]].add(obj,w)
+       prop=w[0]
+       if prop.raw() in obj.properties:
+               obj.properties[prop.raw_mark_ok()].add(obj,w)
        else:
-               obj.properties[w[0]]=keywords[w[0]][0](w)
+               obj.properties[prop.raw()]=keywords[prop.raw_mark_ok()][0](w)
 
-def pline(i,allow_include=False):
+
+def pline(il,allow_include=False):
        "Process a configuration file line"
        global allow_defs, obstack, root
-       w=i.rstrip('\n').split()
-       if len(w)==0: return [i]
+       w=il.rstrip('\n').split()
+       if len(w)==0: return ['']
+       w=list([Tainted(x) for x in w])
        keyword=w[0]
        current=obstack[len(obstack)-1]
+       copyout=lambda: ['    '*len(obstack) +
+                       ' '.join([ww.output() for ww in w]) +
+                       '\n']
        if keyword=='end-definitions':
+               keyword.raw_mark_ok()
                allow_defs=sitelevel.depth
                obstack=[root]
-               return [i]
+               return copyout()
        if keyword=='include':
                if not allow_include:
                        complain("include not permitted here")
@@ -414,11 +572,12 @@ def pline(i,allow_include=False):
                if len(w) != 2:
                        complain("include requires one argument")
                        return []
-               newfile=os.path.join(os.path.dirname(file),w[1])
+               newfile=os.path.join(os.path.dirname(file),w[1].raw_mark_ok())
+               # ^ user of "include" is trusted so raw_mark_ok is good
                return pfilepath(newfile,allow_include=allow_include)
-       if keyword in levels:
+       if keyword.raw() in levels:
                # We may go up any number of levels, but only down by one
-               newdepth=levels[keyword].depth
+               newdepth=levels[keyword.raw_mark_ok()].depth
                currentdepth=len(obstack) # actually +1...
                if newdepth<=currentdepth:
                        obstack=obstack[:newdepth]
@@ -428,37 +587,39 @@ def pline(i,allow_include=False):
                # See if it's a new one (and whether that's permitted)
                # or an existing one
                current=obstack[len(obstack)-1]
-               if w[1] in current.children:
+               tname=w[1].name()
+               if tname in current.children:
                        # Not new
-                       current=current.children[w[1]]
+                       current=current.children[tname]
                        if service and group and current.depth==2:
                                if group!=current.group:
                                        complain("Incorrect group!")
+                               w[2].groupname()
                else:
                        # New
                        # Ignore depth check for now
-                       nl=levels[keyword](w)
+                       nl=levels[keyword.raw()](w)
                        if nl.depth<allow_defs:
                                complain("New definitions not allowed at "
                                        "level %d"%nl.depth)
                                # we risk crashing if we continue
                                sys.exit(1)
-                       current.children[w[1]]=nl
+                       current.children[tname]=nl
                        current=nl
                obstack.append(current)
-               return [i]
-       if keyword not in current.allow_properties:
+               return copyout()
+       if keyword.raw() not in current.allow_properties:
                complain("Property %s not allowed at %s level"%
-                       (keyword,current.type))
+                       (keyword.raw(),current.type))
                return []
        elif current.depth == vpnlevel.depth < allow_defs:
                complain("Not allowed to set VPN properties here")
                return []
        else:
                set_property(current,w)
-               return [i]
+               return copyout()
 
-       complain("unknown keyword '%s'"%(keyword))
+       complain("unknown keyword '%s'"%(keyword.raw()))
 
 def pfilepath(pathname,allow_include=False):
        f=open(pathname)
@@ -517,7 +678,7 @@ def live(n):
        return 0
 def delempty(n):
        "Delete nodes that have no leafnode children"
-       for i in n.children.keys():
+       for i in list(n.children.keys()):
                delempty(n.children[i])
                if not live(n.children[i]):
                        del n.children[i]
@@ -568,18 +729,20 @@ if complaints>0:
        if complaints==1: print("There was 1 problem.")
        else: print("There were %d problems."%(complaints))
        sys.exit(1)
+complaints=None # arranges to crash if we complain later
 
 if service:
        # Put the user's input into their group file, and rebuild the main
        # sites file
-       f=open(groupfiledir+"/T"+group,'w')
+       f=open(groupfiledir+"/T"+group.groupname(),'w')
        f.write("# Section submitted by user %s, %s\n"%
                (user,time.asctime(time.localtime(time.time()))))
        f.write("# Checked by make-secnet-sites version %s\n\n"%VERSION)
        for i in userinput: f.write(i)
        f.write("\n")
        f.close()
-       os.rename(groupfiledir+"/T"+group,groupfiledir+"/R"+group)
+       os.rename(groupfiledir+"/T"+group.groupname(),
+                 groupfiledir+"/R"+group.groupname())
        f=open(sitesfile+"-tmp",'w')
        f.write("# sites file autogenerated by make-secnet-sites\n")
        f.write("# generated %s, invoked by %s\n"%