2 import cStringIO as StringIO
\r
4 class BigDataStorage(object):
\r
6 The StringIO from python aborts with an out-of-memory error after 250MB.
\r
7 So the BigDataStorage stores data in multiple StringIOs to prevent this issue.
\r
10 self._active = StringIO.StringIO()
\r
11 self._list = [self._active]
\r
12 self._read_index = None
\r
14 def write(self, data):
\r
15 self._active.write(data)
\r
16 if self._active.tell() > 1024 * 1024 * 50:
\r
17 self._active = StringIO.StringIO()
\r
18 self._list.append(self._active)
\r
20 def seekStart(self):
\r
21 self._active = self._list[0]
\r
22 self._active.seek(0)
\r
23 self._read_index = 0
\r
25 def read(self, size=-1):
\r
26 ret = self._active.read(size)
\r
28 if self._read_index + 1 < len(self._list):
\r
29 self._read_index += 1
\r
30 self._active = self._list[self._read_index]
\r
31 self._active.seek(0)
\r
32 ret = self._active.read(size)
\r
35 def replaceAtStart(self, dictionary):
\r
36 data = self._list[0].getvalue()
\r
37 block0 = data[0:2048]
\r
38 block1 = StringIO.StringIO()
\r
39 self._list[0] = StringIO.StringIO()
\r
40 block1.write(data[2048:])
\r
41 self._list.insert(1, block1)
\r
42 for key, value in dictionary.items():
\r
43 block0 = block0.replace(key, str(value))
\r
44 self._list[0].write(block0)
\r
48 for data in self._list:
\r
56 self._iter_index = 0
\r
60 if self._iter_index < len(self._list):
\r
61 ret = self._list[self._iter_index].readline()
\r
62 if ret == '' or (ret[-1] != '\n' and ret[-1] != '\r'):
\r
63 self._iter_index += 1
\r
64 if self._iter_index < len(self._list):
\r
65 self._list[self._iter_index].seek(0)
\r
66 return ret + self.next()
\r
72 for data in self._list[:self._iter_index]:
\r
74 if self._iter_index < len(self._list):
\r
75 pos += self._list[self._iter_index].tell()
\r
82 clone = BigDataStorage()
\r
84 for item in self._list:
\r
85 clone._list.append(StringIO.StringIO(item.getvalue()))
\r
86 clone._active = clone._list[-1]
\r