--- /dev/null
+#!/usr/bin/python
+import askQuestion as aQ
+import englishUtils as eU
+from string import Template
+import re
+import urllib2
+import json
+import codecs
+import pickle
+
+def rem_space(text):
+ text = re.sub(' *(?P<text>[^ ]*) *','\g<text>',text)
+ return text
+def print_utf(text):
+ print codecs.encode(text,'utf-8')
+
+def split_info(info):
+ #form - | field = value
+ #value might contain both | and ? inside [[]] or {{}}
+
+ countSq = 0
+ countCur = 0
+ breaks = []
+ for i in range(len(info)):
+ if info[i]=='|' and countSq==0 and countCur==0:
+ breaks.append(i)
+ if info[i]=='[':
+ countSq+=1
+ if info[i]==']':
+ countSq-=1
+ if info[i]=='{':
+ countCur+=1
+ if info[i]=='}':
+ countCur-=1
+
+ breaks.append(-1)
+
+ out =[]
+ s = 0
+ for i in range(len(breaks)):
+ out.append(info[s:breaks[i]])
+ s = breaks[i]+1
+
+
+ return out
+
+def check():
+ file = open(filename2)
+ dicts = pickle.load(file)
+ file.close()
+
+ s = 'SELECT url,name,born,died FROM people WHERE id=?'
+ u = 'SELECT url,name from marriages INNER JOIN people'\
+ +' ON people.id = marriages.idb'\
+ +' WHERE marriages.ida=?'\
+ +' UNION'\
+ +' SELECT url,name from marriages INNER JOIN people'\
+ +' ON people.id = marriages.ida'\
+ +' WHERE marriages.idb = ?'
+ v = 'SELECT url,name from parents INNER JOIN people'\
+ +' ON people.id = parentID'\
+ +' WHERE parents.id = ?'
+ c = 'SELECT url,name from parents INNER JOIN people'\
+ +' ON people.id = parents.ID'\
+ +' WHERE parents.parentID = ?'
+
+# for id in range(1,len(dicts)):
+ for id in range(1,3):
+ t = (id,)
+
+ for r in aQ.run_query(s,t):
+ url = r[0]
+ name = r[1]
+ born = r[2]
+ died = r[3]
+ spurls=[]
+ sp=[]
+ for r in aQ.run_query(u,(id,id)):
+ spurls.append(r[0])
+ sp.append(r[1])
+ if url=='.':
+ continue
+
+ purls=[]
+ ps=[]
+ for r in aQ.run_query(v,t):
+ purls.append(r[0])
+ ps.append(r[1])
+
+ curls = []
+ cs=[]
+ for r in aQ.run_query(c,t):
+ curls.append(r[0])
+ cs.append(r[1])
+
+ struct = dicts[id]
+
+ print 'born'
+ print born
+ if struct.has_key('birth_date'):
+ print_utf(struct['birth_date'])
+# print 'died'
+# print died
+# if struct.has_key('death_date'):
+# print_utf(struct['death_date'])
+# print 'parents'
+# print ps
+# print purls
+# if struct.has_key('father'):
+# print_utf(struct['father'])
+# if struct.has_key('monther'):
+# print_utf(struct['mother'])
+# print 'spouses'
+# print sp
+# print spurls
+# if struct.has_key('spouse'):
+# print_utf(struct['spouse'])
+# elif struct.has_key('spouses'):
+# print_utf(struct['spouses'])
+# print 'children'
+# print cs
+# if struct.has_key('issue'):
+# print_utf(struct['issue'])
+
+def understand():
+
+ file = open(filename)
+ data = pickle.load(file)
+ file.close()
+
+ dicts = ['None']
+ for id in range(1,len(data)):
+ t = (id,)
+ info = data[id]
+ info = split_info(info)
+ struct={}
+ for bit in info:
+ line = bit.split('=',1)
+ if len(line)<2:
+ continue
+ field = rem_space(line[0])
+ value = rem_space(line[1])
+ value = re.sub('\n',' ',value)
+ struct[field]=value
+ dicts.append(struct)
+
+
+ file = open(filename2,'w')
+ pickle.dump(dicts,file)
+ file.close()
+
+
+def get():
+ s = 'SELECT url,name,born,died FROM people WHERE id=?'
+
+
+
+ data = ['blank']
+ for id in range(1,aQ.number_people()):
+ t = (id,)
+ for r in aQ.run_query(s,t):
+ url = r[0]
+ if len(url)==0 or len(url)>0 and url[0]=='.':
+ continue
+
+ title = url.split('/')[-1]
+ print_utf(title)
+ url = 'http://en.wikipedia.org/w/api.php?'\
+ +'format=json&action=query'\
+ +'&titles='+title\
+ +'&prop=revisions&rvprop=content&redirects'
+
+ r = urllib2.urlopen(url)
+ t = r.read()
+ jd = json.JSONDecoder()
+ struct = jd.decode(t)
+ pages = struct['query']['pages'].keys()
+
+ startPatt = re.compile('{{',re.DOTALL)
+ endPatt = re.compile('}}',re.DOTALL)
+ infoboxPatt = re.compile('{{Infobox',re.DOTALL)
+ for p in pages:
+ title = struct['query']['pages'][p]['title']
+
+ try:
+ page = struct['query']['pages'][p]['revisions'][0]['*']
+ except:
+ data.append('None')
+ continue
+ iBox = re.search(infoboxPatt,page)
+ starts = re.finditer(startPatt,page)
+ ends = re.finditer(endPatt,page)
+
+ if iBox==None:
+ data.append('None')
+ continue
+
+ myStart = iBox.start()
+
+ countMe = 0
+ start = -1
+ while start<myStart:
+ start = starts.next().start()
+ end = -1
+ while end<myStart:
+ end = ends.next().start()
+ while 1==1:
+ if start<end:
+ countMe+=1
+ start = starts.next().start()
+ elif end<start:
+ countMe-=1
+ myEnd = end
+ end = ends.next().start()
+ if countMe==0:
+ break
+
+
+ info = page[myStart+2:myEnd]
+ data.append(info)
+
+ file = open(filename,'w')
+ pickle.dump(data,file)
+ file.close()
+
+
+aQ.connect()
+
+global filename
+filename = 'wikiData'
+global filename2
+filename2='wikiDicts'
+#get()
+understand()
+check()
+
+aQ.close()
+
+