read.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 from struct import unpack, calcsize
3 import numpy as np
4 import os
5 import glob
6 import mmap
7 from collections import OrderedDict
8 # backport of concurrent.futures in PyPI package "futures"
9 #from concurrent import futures
10 import itertools
11 import functools
12 
13 
14 """@package read
15 This package provides a wrapper for the binary and xdmf output.
16 
17 In order to load some data.
18 """
19 
20 
21 class read(object):
22 
23  def __init__(self, filename, usecache=False, mode='r'):
24  if isinstance(filename, (tuple, list)):
25  self.filenames = list(filename)
26  if len(self.filenames) == 0:
27  raise IOError("No file given!")
28  else:
29  self.filenames = glob.glob(filename)
30  if len(self.filenames) == 0:
31  raise IOError("No file found, invalid pattern '%s'" % filename)
32  self.filenames.sort()
33  for file in self.filenames:
34  if not os.path.isfile(file):
35  raise IOError("Not a file/missing: '%s'" % file)
36  self.usecache = usecache
37  self.mode = mode
38 
39  # may be reduced if a file can not be openend
40  self.N = len(self.filenames)
41  #self.filetemplate = self._getFiletemplate(filename)
42  #self.filename = filename
43  # if self.isTemplate:
44  # self.setRange()
45  # self.frame = self.range[0]
46  # else:
47  # self.frame = -1
48  #self.last = None
49 
50  self._ReadHeader()
51  if self.usecache:
52  self.cache_ahead = 16
54  self.cache_size = self.cache_ahead + 2
55  self._resetStats()
57 
58  self.frame = 0
59  self.select(self.frame)
60 
61  def _resetStats(self):
62  self.stats = {key: 0 for key in list(self.keys())}
63 
64  def __iter__(self):
65  self.index = 0
66  self.select(self.index)
67  return self
68 
69  def __next__(self):
70  if(self.index >= self.N):
71  raise StopIteration
72  self.select(self.index)
73  self.index += 1
74  return self
75 
76  def _load(self, n):
77  if not ((0 <= n) and (n < self.N)):
78  return True
79  if n not in self.files:
80  preload = [k for k, v in list(self.stats.items()) if v > 0]
81  #self.files[n] = lambda: self._openFile(n,preload)
82  self.files[n] = lambda: memopen(
83  self.filenames[n], self.dtype, preload)
84  return True
85  else:
86  return False
87 
88  def select(self, n):
89  oldframe = self.frame
90  if n < 0:
91  n += self.N
92  self.frame = max(min(n, self.N - 1), 0)
93  delta = n - self.frame
94  loaded = 0
95  if self.usecache:
96  for i in itertools.count(n, delta):
97  if(self._load(i)):
98  loaded += 1
99  if((loaded >= self.cache_growth_rate)
100  or (i >= oldframe + delta * self.cache_ahead)):
101  break
102  try:
103  self.file = self.files[self.frame]
104  except KeyError:
105  raise KeyError(
106  "File with number %i not in allowed range [0,%i]." %
107  (self.frame, self.N))
108  self._resetStats()
109  else:
110  self.data = self._openFile(self.frame)
111 
112  def getFilename(self):
113  return self.filenames[self.frame]
114 
115  def getRange(self):
116  return [0, self.N - 1]
117 
118  def __len__(self):
119  return self.N
120 
121  def __setitem__(self, key, val):
122  raise Exception("Write by getting a reference to the object first!")
123 
124  def __contains__(self, k):
125  return k in list(self.keys())
126 
127  def get(self, k, default=None):
128  if self.usecache:
129  self.stats[k] += 1
130  try:
131  return self.data[k].T
132  except(KeyError):
133  if default is not None:
134  return default
135  print("Available keys:")
136  print(('\n'.join(list(self.keys()))))
137  raise Exception("The key '%s' could not be found." % k)
138 
139  def __getitem__(self, k):
140  return self.get(k)
141 
142  def keys(self):
143  return sorted(list(self.dtype.keys()))
144 
145  def getNumber(self):
146  return int(
147  self.filenames[
148  self.frame].rsplit(
149  '_',
150  1)[1].split(
151  '.',
152  1)[0])
153 
154  def _ReadHeader(self):
155  """ Reads out the header of the binary file.
156 
157  The structure of the fixed size binary data is written down
158  \link here \endlink.
159  """
160  self.dtypes = {
161  1: lambda d: "%si%i" % (self.endian, self.intsize),
162  2: lambda d: "%sf%i" % (self.endian, self.realsize),
163  3: lambda d: "S%i" % d,
164  4: lambda d: "%si%i" % (self.endian, self.intsize),
165  5: lambda d: "%s%sf%i" % ((d,), self.endian, self.realsize),
166  6: lambda d: "%s%sf%i" % (d, self.endian, self.realsize),
167  7: lambda d: "%s%sf%i" % (d, self.endian, self.realsize),
168  8: lambda d: "%s%sf%i" % (d, self.endian, self.realsize),
169  9: lambda d: "%s%si%i" % ((d,), self.endian, self.intsize),
170  10: lambda d: "%sf%i" % (self.endian, self.realsize),
171  11: lambda d: "%si%i" % (self.endian, self.intsize),
172  12: lambda d: "%s%sf%i" % (d, self.endian, self.realsize)}
173 
174  self.dtype = dict()
175 
176 
177  with open(self.filenames[0], 'rb') as m:
178  f = mmap.mmap(m.fileno(), 0, mmap.MAP_SHARED, mmap.PROT_READ)
179  self.magic, self.endian, self.version, self.realsize, self.intsize \
180  = f.read(6), f.read(2), f.read(1), f.read(2), f.read(2)
181  if(self.endian == b'II'):
182  self.endian = '<'
183  else:
184  self.endian = '>'
185  self.version = unpack('%sB' % self.endian, self.version)[0]
186  self.intsize = int(self.intsize)
187  self.realsize = int(self.realsize)
188 
189  keylen = f.read(self.intsize)
190  while len(keylen) == 4:
191  keylen, = unpack('%si' % self.endian, keylen)
192  key, t, datalen = unpack(
193  "%s%isii" %
194  (self.endian, keylen), f.read(
195  keylen + 2 * self.intsize))
196  key = key.decode("utf-8")
197  if datalen > 0:
198  dims = datalen
199  if t in [6, 7, 8, 12]:
200  translate = {6: 3, 7: 3, 8: 4, 12: 5}
201  d = translate[t]
202  datalen -= d * self.intsize
203  dims = tuple(
204  unpack(
205  '%s%ii' %
206  (self.endian,
207  d),
208  f.read(
209  d *
210  self.intsize)))[
211  ::-
212  1]
213  offset = f.tell()
214  self.dtype[key] = (self.dtypes[t](dims), np.int64(offset))
215  f.seek(datalen, os.SEEK_CUR)
216  keylen = f.read(self.intsize)
217 
218  def _openFile(self, i):
219  self.file = np.memmap(self.filenames[i], mode=self.mode)
220  d = dict()
221  for key, value in self.dtype.items():
222  dtype, offset = value
223  dt = np.dtype(dtype)
224  dtscalar = np.dtype(dtype.split(')')[-1])
225  d[key] = np.ndarray(
226  shape=dt.shape,
227  buffer=self.file,
228  dtype=dtscalar,
229  offset=offset)
230  return d
231 
232 class cache(OrderedDict):
233 
234  def __init__(self, maxitems, max_workers=None, *args, **kwargs):
235  super(cache, self).__init__(*args, **kwargs)
236  #self.pool = futures.ProcessPoolExecutor(max_workers=max_workers)
237  self.pool = futures.ThreadPoolExecutor(max_workers=max_workers)
238  self.maxitems = maxitems
239 
240  def __setitem__(self, key, val):
241  while len(self) >= self.maxitems:
242  super(cache, self).popitem(False)
243  fut = self.pool.submit(val)
244  super(cache, self).__setitem__(key, fut)
245 
246  def __getitem__(self, key):
247  # print "CacheSize: ", len(self)
248  return super(cache, self).__getitem__(key).result()
249 
250  def __del__(self):
251  self.pool.shutdown(False)
252 
253 
254 def memopen(fname, dtype, preload=None):
255  f = np.memmap(fname, dtype=dtype, mode='r')
256  #@memoize
257  # def get(key):
258  # return f[key]
259  if preload is not None:
260  # Ask for request, so the page gets cached.
261  # for k in preload: get(k)
262  f[preload]
263  # return get
264  return f
265 
266 
267 def memoize(obj):
268  cache = obj.cache = {}
269 
270  @functools.wraps(obj)
271  def memoizer(*args, **kwargs):
272  key = str(args) + str(kwargs)
273  if key not in cache:
274  cache[key] = obj(*args, **kwargs)
275  return cache[key]
276  return memoizer
277 
278 #d = read('dmr_0000.bin')
279 # print d['/timedisc/density']
dtypes
Definition: read.py:160
def _load(self, n)
Definition: read.py:76
index
Definition: read.py:65
filenames
Definition: read.py:25
def __setitem__(self, key, val)
Definition: read.py:240
type(dict_typ) function, pointer, public dict(n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14, n15, n16, n17, n18, n19, n20)
Construct a new dictionary from several key/value pairs. Together with the Assign subroutine and over...
def getFilename(self)
Definition: read.py:112
realsize
Definition: read.py:187
def __setitem__(self, key, val)
Definition: read.py:121
def getRange(self)
Definition: read.py:115
def min(send, axis=None)
Definition: coplot.py:114
frame
Definition: read.py:58
def _resetStats(self)
Definition: read.py:61
cache_growth_rate
Definition: read.py:53
def __contains__(self, k)
Definition: read.py:124
files
Definition: read.py:56
def _openFile(self, i)
Definition: read.py:218
mode
Definition: read.py:37
def __init__(self, filename, usecache=False, mode='r')
Definition: read.py:23
endian
Definition: read.py:181
def __iter__(self)
Definition: read.py:64
def select(self, n)
Definition: read.py:88
def __del__(self)
Definition: read.py:250
maxitems
Definition: read.py:238
def __getitem__(self, key)
Definition: read.py:246
cache_ahead
Definition: read.py:52
usecache
Definition: read.py:36
def memoize(obj)
Definition: read.py:267
def __next__(self)
Definition: read.py:69
def __getitem__(self, k)
Definition: read.py:139
def __len__(self)
Definition: read.py:118
def memopen(fname, dtype, preload=None)
Definition: read.py:254
def getNumber(self)
Definition: read.py:145
def _ReadHeader(self)
Definition: read.py:154
stats
Definition: read.py:62
def max(send, axis=None)
Definition: coplot.py:120
version
Definition: read.py:185
def get(self, k, default=None)
Definition: read.py:127
def keys(self)
Definition: read.py:142
cache_size
Definition: read.py:54
def __init__(self, maxitems, max_workers=None, args, kwargs)
Definition: read.py:234
intsize
Definition: read.py:186
def split(send, args)
Definition: coplot.py:1206