chiark / gitweb /
adding cgiFiles to repo
[familyTree.git] / familyTree / csv2sql.py
diff --git a/familyTree/csv2sql.py b/familyTree/csv2sql.py
new file mode 100755 (executable)
index 0000000..df8fedb
--- /dev/null
@@ -0,0 +1,265 @@
+#!/usr/bin/env python
+
+import findYear
+import re
+import askQuestion as aQ
+
+def is_int(n):
+       try:
+               a = int(n)
+       except:
+               a = 0
+       return a
+
+def add_quotes(s):
+        return '\"'+s+'\"'
+
+
+def headline_style(Pre,Post,firstName,fam):
+       Post = re.sub('GB','Great Britain',Post)
+       Post = re.sub('UK',' the United Kingdom',Post)
+       Post = re.sub('the the','the',Post)
+
+       if fam=='.':
+               fam=''
+
+#      romanNumber = re.findall('^([IVXLCDM]*)( |$)',Post)
+       romanNumber = re.findall('^([IVXLCDM]*)( |,|$)',Post)
+       if len(romanNumber)>0:
+               Post = re.sub(romanNumber[0][0],'',Post,1)
+               romanNumber = ' '+romanNumber[0][0]
+
+       else:
+               romanNumber=''
+       Post = ' '+Post
+
+#      print romanNumber
+       if Post=='' and Pre =='':
+               return firstName
+
+       if Pre=='':
+               return firstName+romanNumber+Post
+
+       if re.match('(King|Queen|Pr|Grand).*',Pre):
+               return Pre+' '+firstName+romanNumber+Post
+
+       if re.match('.*Emperor.*',Pre):
+               if re.match('.*(Holy|HRE).*',Post) or Post ==romanNumber:
+                       return firstName + romanNumber +',Holy Roman Emperor'
+               else:
+                       return Pre+' '+firstName+romanNumber+Post
+
+
+       if re.match('^(St)',Pre):
+               return Pre+' '+firstName+' '+Post
+       if re.match('^(Sir|Lady|Captain|Pope)',Pre):
+               return Pre+' '+firstName+' '+Post
+       if romanNumber is None:
+               return firstName + ' '+fam+', '+Pre + ' '+Post
+
+       else:
+               return firstName+' '+fam+' '+romanNumber+', '+Pre+' '+Post
+
+
+       return  Pre + ' ' + firstName+romanNumber +' '+ Post
+
+       
+
+
+
+
+csvFile = 'Family Tree.csv'
+#dbFile = 'tree.db'
+conn = aQ.connect()
+
+csv = open(csvFile,'r')
+
+row = csv.readline()
+headings = row.split(';')
+IDCol = headings.index('ID')
+PreCol = headings.index('Pre-name style')
+NameCol = headings.index('Name')
+PostCol = headings.index('Post-name style')
+DisplayCol = headings.index('Display')
+URLCol = headings.index('URL')
+PictureCol = headings.index('Picture')
+BornCol = headings.index('Born')
+DiedCol = headings.index('Died')
+FatherCol = headings.index('Father')
+MotherCol = headings.index('Mother')
+SpousesCol = headings.index('Spouses')
+MarriedCol = headings.index('Married')
+EndedCol = headings.index('Ended')
+StyleStartCol = headings.index('Style')
+FamCol = headings.index('Family')
+
+for line in csv:
+       record = line.split(';')
+#      print record
+       ID = record[IDCol]
+#      print ID
+       Pre = record[PreCol]
+       if Pre=='.':
+               Pre = ''
+
+       Post = record[PostCol]
+       if Post=='.':
+               Post = ''
+       Post = re.sub('HRE','Holy Roman Emperor',Post)  
+       
+       Name = record[NameCol]
+       if Name == '':
+               continue
+       a = re.search(' ([A-Z]+[^a-z\.])( |$)',Name)
+       b = re.search('^([A-Za-z]* of [A-Za-z]*)($| )',Name)
+       names = Name.split(' ')
+       if a != None:
+               firstName = a.group(1)
+               firstName = firstName[0]+firstName[1:].lower()
+       elif b!=None:
+               firstName = b.group(1)
+       else:
+               firstName = names[0]
+
+       Family = record[FamCol].split(',')
+
+
+#      print ID
+
+       if record[DisplayCol]=='.':
+               titleName = headline_style(Pre,Post,firstName,Family[0])
+       else:
+               titleName = record[DisplayCol]
+
+#      print titleName
+       for name in names:
+               if name==' ' or name =='':
+                       continue
+               a = re.search('([A-Z]+[^a-z\.]*)$',name)
+               if a != None:
+                       name = name[0]+name[1:].lower()
+               aQ.make_insert('names',[ID,name])       
+
+
+       URL = record[URLCol]
+       URL = re.sub('http://','https://',URL)
+
+       Picture = record[PictureCol]
+
+       Born = record[BornCol]
+       yearBorn = findYear.find_year(Born)
+       monthBorn = findYear.find_month(Born)
+       bornDay = findYear.find_day(Born)
+
+       Died = record[DiedCol]
+       yearDied = findYear.find_year(Died)
+       monthDied = findYear.find_month(Died)
+       diedDay = findYear.find_day(Died)
+
+       Father = record[FatherCol]
+       aQ.make_insert('parents',[ID, Father])
+
+       Mother = record[MotherCol]
+       aQ.make_insert('parents',[ID, Mother])
+
+
+       
+       aQ.make_insert('people',\
+                [ID,titleName,firstName,Born,yearBorn,\
+                Died,yearDied,monthBorn,monthDied,\
+               URL,Picture,Pre,Post,Name,bornDay,diedDay])
+
+
+       for f in Family:
+               if f !='.':
+                       while re.search('^ ',f):
+                               f = f[1:]
+                       aQ.make_insert('families',[ID,f])       
+
+       Spouses = record[SpousesCol].split(',')
+       Married = record[MarriedCol].split(',')
+       Ended = record[EndedCol].split(',')
+
+       for i in range(len(Spouses)):
+               spouse = Spouses[i]
+               if len(Married)>i:
+                       start = Married[i]
+                       startYear = findYear.find_year(start)
+               else:
+                       start = '.'
+                       startYear = 0
+               if len(Ended)>i:
+                       end = Ended[i]
+               else:
+                       end = '.'
+
+               startDate = findYear.reverse_order(start)
+               endDate = findYear.reverse_order(end)
+               if is_int(ID)<is_int(spouse) or \
+               (is_int(spouse)==0 and spouse!='.' and spouse!=''):
+                       aQ.make_insert('marriages',[ID,spouse,start,end,\
+                               startYear,startDate,endDate,'0'])
+
+               elif is_int(ID)>is_int(spouse) and is_int(spouse)!=0:
+                       s = "SELECT idb from marriages"\
+                               +" WHERE ida == ?;"
+                       t = (is_int(spouse),)
+
+                       got = []
+                       for r in aQ.run_query(s,t):
+                               got.append(is_int(r[0]))
+
+                       if is_int(ID) not in got:
+                               aQ.make_insert('marriages',[spouse,ID,start,end,\
+                                startYear,startDate,endDate,'0'])
+
+
+       i = StyleStartCol
+       while i<len(record):
+               if record[i] == 'Consort':
+                       break
+               if record[i] == '':
+                       break
+               
+               Style = record[i]
+               Style = re.sub('HRE','Holy Roman Emperor',Style)
+               Start = record[i+2]
+               End = record[i+3]
+
+               yearStart = findYear.find_year(Start)
+               startDate = findYear.reverse_order(Start)
+               yearEnd = findYear.find_year(End)
+               endDate = findYear.reverse_order(End)
+
+               
+               aQ.make_insert('styles',[ID,Style,Start,\
+                       yearStart,End,yearEnd,startDate,endDate])
+
+               i = i+4
+
+#      titles = ['Duke','Count','Elector','Countess','Duchess']
+       if i==StyleStartCol: #no styles
+#              if Pre in titles:
+               if re.search('(^| )of ',Post) and Pre != '':
+                       x = re.match('(.*) (of .*)',Post)
+                       Roman = '[IVXLDCM]*'
+                       if Pre=='St':
+                               Style = 'Saint'
+                       else:
+                               if x:
+                                       Post = x.group(2)
+                                       if not re.match(Roman,x.group(1)):
+                                               Post = x.group(1)+' '+Post
+                               Style = Pre +' '+Post
+                       Start = '?'
+                       End = '?'
+
+
+
+                       aQ.make_insert('styles',[ID,Style,Start,\
+                               0,End,0,0,0])
+        
+
+aQ.commit_changes()
+       
+aQ.close()