chiark / gitweb /
I'm sure I've done something since I remembered to commit...
[familyTree.git] / familyTree / text2SQL.py
1 #!/usr/bin/python
2 import findYear
3 import re
4
5 def add_quotes(s):
6         return '\"'+s+'\"'
7
8 def is_number(s):
9     try:
10         float(s)
11         return s
12     except ValueError:
13         return add_quotes(s)
14
15 def make_insert(table,fields):
16         s = 'INSERT INTO ' + table + ' VALUES('
17         for f in fields:
18                 s = s + str(f) + ','
19         s = s[:-1]
20         s = s+');'
21         return s
22
23 f = open('tree','r')
24
25 lastline='';
26 finishedRecord = 0;
27 hasStyle = 0;
28 terr = 0;
29 for line in f:
30         thisline = line
31         thisline = thisline[:-1]
32         
33         if thisline[-2:] == '\r\n':
34                 thisline = thisline[:-2]
35         if lastline == 'ID:':
36                 thisID = thisline
37
38         if lastline=='Pre-name style:':
39                 prens = thisline
40
41         if lastline=='Post-name style:':
42                 postns=thisline
43
44         if lastline == 'Name:':
45                 a = re.search(' ([A-Z]+[^a-z])',thisline)
46                 names = thisline.split()
47
48                 if a !=None:
49                         name = a.group(1)
50                         firstName = name[0] + name[1:].lower()
51                 else:
52                         if len(names)>0:
53                                 firstName = names[0]
54                         else:
55                                 firstName = ''
56
57                 thisName = add_quotes(thisline)
58         if lastline == 'Born:':
59                 yb = findYear.find_year(thisline)
60                 mb = findYear.find_month(thisline)
61                 thisBorn =  add_quotes(thisline)
62         if lastline == 'Died:':
63                 yd = findYear.find_year(thisline)
64                 md = findYear.find_month(thisline)
65                 thisDied =  add_quotes(thisline)
66                 finishedRecord=1
67         if lastline == 'URL:':
68                 url = add_quotes(thisline)
69         if lastline == 'Picture:':
70                 picture = add_quotes(thisline)
71         if lastline == 'Father:':
72                 a = is_number(thisline)
73                 s = make_insert('parents',[thisID, a])
74                 print s
75         if lastline=='Mother:':
76                 a=is_number(thisline)
77                 s = make_insert('parents',[thisID, a])
78                 print s
79         
80         if finishedRecord ==1:
81                 if prens!='.':
82                         pre = prens+' ' 
83                 else:
84                         pre = ''
85                 if postns!='.':
86                         post = ' '+postns
87                 else:
88                         post = ''
89                 titleName =''
90                 if pre!='':
91                         titleName += pre + ' '
92                 titleName += firstName
93                 if post!='':
94                         titleName+=post
95                 titleName = add_quotes(titleName)
96                         
97                 prens = add_quotes(prens)
98                 postns = add_quotes(postns)
99                 firstName = add_quotes(firstName)
100                 s = make_insert('people',\
101                 [thisID,titleName,firstName,thisBorn,yb,\
102                         thisDied,yd,mb,md,url,picture,prens,postns,thisName])
103                 print s
104                 finishedRecord = 0
105         if lastline == 'Style:':
106                 thisStyle =  add_quotes(thisline)
107                 hasStyle = 1;
108         if terr ==1:
109                 if thisline=='':
110                         terr=0;
111                 else:
112                         thisTerr.append(add_quotes(thisline))
113         if lastline == 'Territories:':
114                 thisTerr=[add_quotes(thisline)]
115                 terr = 1;
116         if hasStyle == 1:
117                 if lastline=='From:':
118                         yf = findYear.find_year(thisline)
119                         thisFrom =  add_quotes(thisline)
120                 if lastline =='To:':
121                         yt = findYear.find_year(thisline)
122                         thisTo =  add_quotes(thisline)
123                         s = make_insert('styles',[thisID,thisStyle,thisFrom,\
124                                 yf,thisTo,yt])
125                         print s
126
127                         for terr in thisTerr:
128                                 s = make_insert('territories',[thisID,terr,\
129                                         thisFrom,yf,thisTo,yt])
130
131                                 print s
132
133                         hasStyle = 0
134
135         lastline = thisline
136
137