chiark / gitweb /
adding cgiFiles to repo
[familyTree.git] / familyTree / stylesFettling.py
diff --git a/familyTree/stylesFettling.py b/familyTree/stylesFettling.py
new file mode 100755 (executable)
index 0000000..4af4b07
--- /dev/null
@@ -0,0 +1,151 @@
+#!/usr/bin/python
+
+import askQuestion as aQ
+import sys     
+import re
+import string
+def add_quotes(s):
+        return '\"'+s+'\"'
+
+
+def problem_places(style,tofrom):
+       problemPlaces = [\
+               'United Kingdom of Great Britain and Ireland',\
+               'United Kingdom of Great Britain and Northern Ireland',\
+               'Church of England and of Ireland',\
+               'Church of England',\
+               'Republic of the Seven United Netherlands',\
+               'Realms and Territories',\
+               'Holy Roman Empire',\
+               ]
+
+       shortForm = [\
+               'UKGBI',\
+               'UKGBNI',\
+               'CEI',\
+               'CE',\
+               'RSUN',\
+               'RAT',\
+               'HRE',\
+               ]
+
+       for i in range(len(shortForm)):
+               pp = problemPlaces[i]
+               sF = shortForm[i]
+               if tofrom==1:
+                       style = re.sub(pp,sF,style)
+               else:
+                       style = re.sub(sF,pp,style)
+
+       return style
+
+conn = aQ.connect()
+
+try:
+       s = 'DROP TABLE styleDecomp;'
+       a = aQ.run_query(s,())
+except:
+       print ''
+       
+
+s = 'CREATE TABLE styleDecomp\n(\nStyle text,\nShort text);'
+a = aQ.run_query(s,())
+
+
+s = "SELECT DISTINCT style FROM styles ORDER BY style;"
+
+
+myRE = '[1-9]*(th|rd|nd|st)* *([a-zA-Z !-]*?) of ([a-zA-Z-. ]*),?| *([a-zA-Z-. ]*),?'
+for r in aQ.run_query(s,()):
+
+       style = r[0]
+
+       style = problem_places(style,1)
+
+       style = re.sub('By the Grace of God,','',style)
+       style = re.sub('By the Grace of God','',style)
+
+       if style[0:3]==' of':
+               style = '!'+style
+
+       d = re.findall(myRE,style)
+       title = ''
+       if d != None:
+               shortStyles=[]
+               for j in d:
+                       i = j[-3:]
+                       if i[0]!='' and i[0]!='and':
+                               title = i[0]
+                               title = re.split(' and ',title)
+                       if i[1]!='':
+                               place = i[1]
+                       elif i[2]!='':
+                               place = i[2]
+                       else:
+                               continue
+                       if place[0:3]=='and':
+                               place = place[4:]
+                       if title[0:3]=='and':
+                               title = title[4:]
+                       if place == 'Holy Roman Emperor' or place=='HRE':
+                               shortStyles.append('Holy Roman Emperor')
+                               continue
+                       elif not re.match('.* of .*',style):
+                               number = '[1-9]+(th|rd|nd|st) '
+                               style = re.sub(number,'',style)
+                               shortStyles.append(style)
+                       else:
+                               
+                               place = re.split(' and ',place)
+                               for p in place:
+                                       e = re.findall(myRE,p)
+                                       
+                                       if e[0][1]!='':
+                                               title = e[0][1]
+                                               title = re.split\
+                                               (' and ',title)
+                                               p = e[0][2]
+                                       if p=='etc':
+                                               continue
+                                       if p[-4:] =='Head':
+                                               title = ''
+                                       if p[0:2]=='of':
+                                               addMe = ' '+p
+                                       else:
+                                               addMe = ' of '+p
+                                       for t in title:
+                                               shortStyles.append\
+                                               (t + addMe)
+
+       noTitle=[]
+       for i in range(len(shortStyles)):
+               shortStyles[i] = problem_places(shortStyles[i],2)
+
+               if  shortStyles[i][0]=='!':
+                       noTitle.append(i)
+
+               if shortStyles[i][0:4]=='and ':
+                       shortStyles[i] = shortStyles[i][4:]
+
+       if len(noTitle)>0:
+               last = shortStyles[noTitle[-1]]
+               split = last.split(' ')
+               title = split[-1]
+               shortStyles[noTitle[-1]]= shortStyles[noTitle[-1]][:-len(title)]
+               for i in noTitle:
+                       shortStyles[i] = re.sub('!',title,shortStyles[i])
+       
+
+#      print r[0]
+       for s in shortStyles:
+               while s[0]==' ':
+                       s = s[1:]
+               while s[-1]==' ':
+                       s=s[:-1]
+
+               aQ.make_insert('styleDecomp',[r[0], s])
+               
+
+
+aQ.commit_changes()
+aQ.close()