read.py
Go to the documentation of this file.
1#!/usr/bin/env python
2from struct import unpack, calcsize
3import numpy as np
4import os
5import glob
6import mmap
7from collections import OrderedDict
8# backport of concurrent.futures in PyPI package "futures"
9#from concurrent import futures
10import itertools
11import functools
12
13
14"""@package read
15This package provides a wrapper for the binary and xdmf output.
16
17In order to load some data.
18"""
19
20
21class read(object):
22
23 def __init__(self, filename, usecache=False, mode='r'):
24 if isinstance(filename, (tuple, list)):
25 self.filenames = list(filename)
26 if len(self.filenames) == 0:
27 raise IOError("No file given!")
28 else:
29 self.filenames = glob.glob(filename)
30 if len(self.filenames) == 0:
31 raise IOError("No file found, invalid pattern '%s'" % filename)
32 self.filenames.sort()
33 for file in self.filenames:
34 if not os.path.isfile(file):
35 raise IOError("Not a file/missing: '%s'" % file)
36 self.usecache = usecache
37 self.mode = mode
38
39 # may be reduced if a file can not be openend
40 self.N = len(self.filenames)
41 #self.filetemplate = self._getFiletemplate(filename)
42 #self.filename = filename
43 # if self.isTemplate:
44 # self.setRange()
45 # self.frame = self.range[0]
46 # else:
47 # self.frame = -1
48 #self.last = None
49
50 self._ReadHeader()
51 if self.usecache:
52 self.cache_ahead = 16
54 self.cache_size = self.cache_ahead + 2
55 self._resetStats()
57
58 self.frame = 0
59 self.select(self.frame)
60
61 def _resetStats(self):
62 self.stats = {key: 0 for key in list(self.keys())}
63
64 def __iter__(self):
65 self.index = 0
66 self.select(self.index)
67 return self
68
69 def __next__(self):
70 if(self.index >= self.N):
71 raise StopIteration
72 self.select(self.index)
73 self.index += 1
74 return self
75
76 def _load(self, n):
77 if not ((0 <= n) and (n < self.N)):
78 return True
79 if n not in self.files:
80 preload = [k for k, v in list(self.stats.items()) if v > 0]
81 #self.files[n] = lambda: self._openFile(n,preload)
82 self.files[n] = lambda: memopen(
83 self.filenames[n], self.dtype, preload)
84 return True
85 else:
86 return False
87
88 def select(self, n):
89 oldframe = self.frame
90 if n < 0:
91 n += self.N
92 self.frame = max(min(n, self.N - 1), 0)
93 delta = n - self.frame
94 loaded = 0
95 if self.usecache:
96 for i in itertools.count(n, delta):
97 if(self._load(i)):
98 loaded += 1
99 if((loaded >= self.cache_growth_rate)
100 or (i >= oldframe + delta * self.cache_ahead)):
101 break
102 try:
103 self.file = self.files[self.frame]
104 except KeyError:
105 raise KeyError(
106 "File with number %i not in allowed range [0,%i]." %
107 (self.frame, self.N))
108 self._resetStats()
109 else:
110 self.data = self._openFile(self.frame)
111
112 def getFilename(self):
113 return self.filenames[self.frame]
114
115 def getRange(self):
116 return [0, self.N - 1]
117
118 def __len__(self):
119 return self.N
120
121 def __setitem__(self, key, val):
122 raise Exception("Write by getting a reference to the object first!")
123
124 def __contains__(self, k):
125 return k in list(self.keys())
126
127 def get(self, k, default=None):
128 if self.usecache:
129 self.stats[k] += 1
130 try:
131 return self.data[k].T
132 except(KeyError):
133 if default is not None:
134 return default
135 print("Available keys:")
136 print(('\n'.join(list(self.keys()))))
137 raise Exception("The key '%s' could not be found." % k)
138
139 def __getitem__(self, k):
140 return self.get(k)
141
142 def keys(self):
143 return sorted(list(self.dtype.keys()))
144
145 def getNumber(self):
146 return int(
147 self.filenames[
148 self.frame].rsplit(
149 '_',
150 1)[1].split(
151 '.',
152 1)[0])
153
154 def _ReadHeader(self):
155 """ Reads out the header of the binary file.
156
157 The structure of the fixed size binary data is written down
158 \link here \endlink.
159 """
160 self.dtypes = {
161 1: lambda d: "%si%i" % (self.endian, self.intsize),
162 2: lambda d: "%sf%i" % (self.endian, self.realsize),
163 3: lambda d: "S%i" % d,
164 4: lambda d: "%si%i" % (self.endian, self.intsize),
165 5: lambda d: "%s%sf%i" % ((d,), self.endian, self.realsize),
166 6: lambda d: "%s%sf%i" % (d, self.endian, self.realsize),
167 7: lambda d: "%s%sf%i" % (d, self.endian, self.realsize),
168 8: lambda d: "%s%sf%i" % (d, self.endian, self.realsize),
169 9: lambda d: "%s%si%i" % ((d,), self.endian, self.intsize),
170 10: lambda d: "%sf%i" % (self.endian, self.realsize),
171 11: lambda d: "%si%i" % (self.endian, self.intsize),
172 12: lambda d: "%s%sf%i" % (d, self.endian, self.realsize)}
173
174 self.dtype = dict()
175
176
177 with open(self.filenames[0], 'rb') as m:
178 f = mmap.mmap(m.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_READ)
179 self.magic, self.endian, self.version, self.realsize, self.intsize \
180 = f.read(6), f.read(2), f.read(1), f.read(2), f.read(2)
181 if(self.endian == b'II'):
182 self.endian = '<'
183 else:
184 self.endian = '>'
185 self.version = unpack('%sB' % self.endian, self.version)[0]
186 self.intsize = int(self.intsize)
187 self.realsize = int(self.realsize)
188
189 keylen = f.read(self.intsize)
190 while len(keylen) == 4:
191 keylen, = unpack('%si' % self.endian, keylen)
192 key, t, datalen = unpack(
193 "%s%isii" %
194 (self.endian, keylen), f.read(
195 keylen + 2 * self.intsize))
196 key = key.decode("utf-8")
197 if datalen > 0:
198 dims = datalen
199 if t in [6, 7, 8, 12]:
200 translate = {6: 3, 7: 3, 8: 4, 12: 5}
201 d = translate[t]
202 datalen -= d * self.intsize
203 dims = tuple(
204 unpack(
205 '%s%ii' %
206 (self.endian,
207 d),
208 f.read(
209 d *
210 self.intsize)))[
211 ::-
212 1]
213 offset = f.tell()
214 self.dtype[key] = (self.dtypes[t](dims), np.int64(offset))
215 f.seek(datalen, os.SEEK_CUR)
216 keylen = f.read(self.intsize)
217
218 def _openFile(self, i):
219 self.file = np.memmap(self.filenames[i], mode=self.mode)
220 d = dict()
221 for key, value in self.dtype.items():
222 dtype, offset = value
223 dt = np.dtype(dtype)
224 dtscalar = np.dtype(dtype.split(')')[-1])
225 d[key] = np.ndarray(
226 shape=dt.shape,
227 buffer=self.file,
228 dtype=dtscalar,
229 offset=offset)
230 return d
231
232class cache(OrderedDict):
233
234 def __init__(self, maxitems, max_workers=None, *args, **kwargs):
235 super(cache, self).__init__(*args, **kwargs)
236 #self.pool = futures.ProcessPoolExecutor(max_workers=max_workers)
237 self.pool = futures.ThreadPoolExecutor(max_workers=max_workers)
238 self.maxitems = maxitems
239
240 def __setitem__(self, key, val):
241 while len(self) >= self.maxitems:
242 super(cache, self).popitem(False)
243 fut = self.pool.submit(val)
244 super(cache, self).__setitem__(key, fut)
245
246 def __getitem__(self, key):
247 # print "CacheSize: ", len(self)
248 return super(cache, self).__getitem__(key).result()
249
250 def __del__(self):
251 self.pool.shutdown(False)
252
253
254def memopen(fname, dtype, preload=None):
255 f = np.memmap(fname, dtype=dtype, mode='r')
256 #@memoize
257 # def get(key):
258 # return f[key]
259 if preload is not None:
260 # Ask for request, so the page gets cached.
261 # for k in preload: get(k)
262 f[preload]
263 # return get
264 return f
265
266
267def memoize(obj):
268 cache = obj.cache = {}
269
270 @functools.wraps(obj)
271 def memoizer(*args, **kwargs):
272 key = str(args) + str(kwargs)
273 if key not in cache:
274 cache[key] = obj(*args, **kwargs)
275 return cache[key]
276 return memoizer
277
278#d = read('dmr_0000.bin')
279# print d['/timedisc/density']
def __setitem__(self, key, val)
Definition: read.py:240
def __del__(self)
Definition: read.py:250
def __init__(self, maxitems, max_workers=None, *args, **kwargs)
Definition: read.py:234
def __getitem__(self, key)
Definition: read.py:246
maxitems
Definition: read.py:238
realsize
Definition: read.py:187
dtypes
Definition: read.py:160
index
Definition: read.py:65
def __iter__(self)
Definition: read.py:64
version
Definition: read.py:185
mode
Definition: read.py:37
cache_ahead
Definition: read.py:52
usecache
Definition: read.py:36
frame
Definition: read.py:58
def __getitem__(self, k)
Definition: read.py:139
def getRange(self)
Definition: read.py:115
def __contains__(self, k)
Definition: read.py:124
def __init__(self, filename, usecache=False, mode='r')
Definition: read.py:23
cache_growth_rate
Definition: read.py:53
def getFilename(self)
Definition: read.py:112
def _load(self, n)
Definition: read.py:76
filenames
Definition: read.py:25
def _openFile(self, i)
Definition: read.py:218
endian
Definition: read.py:181
def select(self, n)
Definition: read.py:88
def getNumber(self)
Definition: read.py:145
def __next__(self)
Definition: read.py:69
def _ReadHeader(self)
Definition: read.py:154
cache_size
Definition: read.py:54
intsize
Definition: read.py:186
stats
Definition: read.py:62
def __len__(self)
Definition: read.py:118
def __setitem__(self, key, val)
Definition: read.py:121
def keys(self)
Definition: read.py:142
def _resetStats(self)
Definition: read.py:61
def get(self, k, default=None)
Definition: read.py:127
files
Definition: read.py:56
type(dict_typ) function, pointer, public dict(n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14, n15, n16, n17, n18, n19, n20)
Construct a new dictionary from several key/value pairs. Together with the Assign subroutine and over...
def split(send, *args)
Definition: coplot.py:1206
def max(send, axis=None)
Definition: coplot.py:120
def min(send, axis=None)
Definition: coplot.py:114
def memopen(fname, dtype, preload=None)
Definition: read.py:254
def memoize(obj)
Definition: read.py:267
integer, parameter list