forked from gleber/python-pxlib
-
Notifications
You must be signed in to change notification settings - Fork 1
/
pxpy.pyx
664 lines (531 loc) · 18.7 KB
/
pxpy.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
# -*- coding: utf-8 -*-
"""
Python wrapper around pxlib.
This module, written in Cython_, allow to read data from Paradox tables
using the pxlib_ library.
.. _cython: http://cython.org/
.. _pxlib: http://pxlib.sourceforge.net/
"""
import datetime
import sys
import atexit
from cpython.version cimport PY_MAJOR_VERSION
from paradox cimport *
cdef str DEFAULT_ENCODING="UTF-8"
cdef void errorhandler(pxdoc_t *p, int type, const_char_ptr msg, void *data):
print("ParadoxError: {} - {}".format(type, msg))
cdef class Record
cdef class PXDoc
cdef class Table:
"""
A `Table` represent a Paradox table, with primary index and blob file.
"""
cdef PXDoc doc
cdef RecordIterator defaultIterator
cdef str target_encoding
cdef str input_encoding
cdef PrimaryIndex primary_index
cdef bytes blob_file
def __cinit__(self, filename, index_file=None, blob_file=None):
self.target_encoding = DEFAULT_ENCODING
self.input_encoding = DEFAULT_ENCODING
self.doc = PXDoc(filename)
self.primary_index = PrimaryIndex(index_file)\
if index_file is not None else None
self.blob_file = blob_file.encode(DEFAULT_ENCODING)\
if blob_file is not None else None
def open(self):
"""
Open the data file and associate a Record instance.
"""
self.doc.open()
self.doc.targetEncoding = self.target_encoding
self.doc.inputEncoding = self.input_encoding
if self.primary_index is not None:
self.primary_index.open()
self.doc.setPrimaryIndex(self.primary_index)
if self.blob_file is not None:
self.doc.setBlobFile(self.blob_file)
self.defaultIterator = RecordIterator(self.doc)
def __enter__(self):
self.open()
return self
def close(self):
"""
Close the eventual primary index or blob file, then the data file.
"""
if self.primary_index:
self.primary_index.close()
self.doc.close()
def __exit__(self, type, value, traceback):
self.close()
def getTableName(self):
return self.doc.getTableName()
def getName(self):
return self.doc.getName()
def getTargetEncoding(self):
return self.doc.targetEncoding
def setTargetEncoding(self, encoding):
self.target_encoding = encoding
def getInputEncoding(self):
return self.doc.inputEncoding
def setInputEncoding(self, encoding):
self.input_encoding = encoding
def getCodePage(self):
return self.doc.getCodePage()
def hasBlobFile(self):
return self.doc.hasBlobFile()
def getBlobName(self):
return self.doc.getBlobName()
def getRecordCount(self):
return len(self.doc)
def getFieldCount(self):
return self.defaultIterator.getFieldCount()
def getFieldNames(self):
return self.defaultIterator.getFieldNames()
def __iter__(self):
return self.defaultIterator
def __getitem__(self, key):
if isinstance(key, slice):
start = key.start if key.start else 0
stop = key.stop if key.stop else 0
step = key.step if key.step else 1
return RecordIterator(self.doc, start, stop, step)
elif isinstance(key, int):
return self.defaultIterator[key]
def __len__(self):
return len(self.doc)
cdef class RecordIterator:
"""
An instance has notion about the current record number, offset and limit
"""
cdef readonly PXDoc doc
cdef readonly Record record
cdef int limit
cdef int offset
cdef int step
cdef int current_recno
def __cinit__(self, PXDoc doc, int offset=0, int limit=0, int step=1):
self.doc = doc
self.record = Record(doc)
cdef int doc_length = len(doc)
if limit == 0:
limit = doc_length
elif limit > doc_length:
limit = doc_length
elif limit < 0:
limit = doc_length + limit
self.limit = limit
if offset < 0:
offset = doc_length + offset
elif offset > doc_length:
offset = doc_length
self.offset = offset
self.step = step
self.current_recno = self.offset
def next(self):
return self.__next__()
def __next__(self):
recno = self.current_recno
ok = True
if recno >= self.limit:
ok = False
else:
ok = self.record.read(recno)
self.current_recno = recno + self.step
if not ok:
self.current_recno = self.offset
raise StopIteration()
return self.record
def __iter__(self):
return self
def __len__(self):
return self.limit - self.offset
def getFieldCount(self):
return len(self.record)
def getFieldNames(self):
return self.record.getFieldNames()
def __getitem__(self, recno):
if recno < 0:
recno = self.limit + recno
if recno >= self.limit or recno < 0:
raise IndexError()
record = Record(self.doc)
record.read(recno)
return record
cdef class PXDoc:
"""
Basic wrapper to 'pxdoc_t' based objects.
"""
cdef pxdoc_t *px_doc
cdef bytes filename
cdef char isopen
def __cinit__(self, filename):
"""
Create a PXDoc instance, associated to the given external filename.
"""
self.filename = filename.encode(DEFAULT_ENCODING)
self.px_doc = PX_new2(&errorhandler, NULL, NULL, NULL)
self.isopen = 0
def __len__(self):
return self.px_doc.px_head.px_numrecords
def open(self):
"""
Open the data file.
"""
if PX_open_file(self.px_doc, self.filename) < 0:
raise Exception("Couldn't open `%s`" % self.filename)
self.isopen = 1
def close(self):
"""
Close the data file if needed.
"""
if self.isopen:
PX_close(self.px_doc)
self.isopen = 0
def getCodePage(self):
"""
Return the code page of the underlying Paradox table.
"""
return str("cp" + str(self.px_doc.px_head.px_doscodepage)).encode()
property targetEncoding:
def __get__(self):
if self.px_doc.targetencoding:
return self.px_doc.targetencoding.decode()
return None
def __set__(self, encoding):
if (PY_MAJOR_VERSION >= 3 and isinstance(encoding, str))\
or isinstance(encoding, unicode):
encoding = encoding.encode()
PX_set_targetencoding(self.px_doc, encoding)
property inputEncoding:
def __get__(self):
if self.px_doc.inputencoding:
return self.px_doc.inputencoding.decode()
return None
def __set__(self, encoding):
if (PY_MAJOR_VERSION >= 3 and isinstance(encoding, str))\
or isinstance(encoding, unicode):
encoding = encoding.encode()
PX_set_inputencoding(self.px_doc, encoding)
def setValue(self, parameter, value):
PX_set_value(self.px_doc, parameter, <float>value)
def setParameter(self, parameter, value):
PX_set_parameter(self.px_doc, parameter, value)
def getTableName(self):
return self.px_doc.px_head.px_tablename.decode(self.targetEncoding)
def getName(self):
return self.px_doc.px_name.decode(self.targetEncoding)
def getBlobName(self):
return self.px_doc.px_blob.mb_name if self.hasBlobFile() else None
cdef setBlobFile(self, bytes filename):
"""
Set and open the external blob file.
"""
PX_set_blob_file(self.px_doc, filename)
cdef setPrimaryIndex(self, PrimaryIndex index):
"""
Set the primary index of the table.
"""
if PX_add_primary_index(self.px_doc, index.px_doc) < 0:
raise Exception("Couldn't add primary index `%s`" % index.filename)
cdef bint hasBlobFile(self):
return PX_has_blob_file(self.px_doc)
def __dealloc__(self):
"""
Close the data file
"""
self.close()
PX_delete(self.px_doc)
cdef class PrimaryIndex:
"""
The primary index file.
"""
cdef pxdoc_t *px_doc
cdef bytes filename
cdef char isopen
def __cinit__(self, filename):
"""
Create a PXDoc instance, associated to the given external filename.
"""
self.filename = filename.encode(DEFAULT_ENCODING)
self.px_doc = PX_new2(&errorhandler, NULL, NULL, NULL)
self.isopen = 0
def open(self):
"""
Open the data file.
"""
if PX_open_file(self.px_doc, self.filename) < 0:
raise Exception("Couldn't open `%s`" % self.filename)
if PX_read_primary_index(self.px_doc) < 0:
raise Exception("Couldn't read primary index `%s`" % self.filename)
self.isopen = 1
def close(self):
"""
Close the data file if needed.
"""
if self.isopen:
PX_close(self.px_doc)
self.isopen = 0
def __dealloc__(self):
"""
Close the data file
"""
self.close()
PX_delete(self.px_doc)
cdef class ParadoxField:
cdef readonly fname
cdef readonly ftype
cdef readonly flen
def __cinit__(self, *args):
pass
def _init_fields(self, fname, int ftype, int flen):
self.fname = fname
self.ftype = ftype
self.flen = flen
def __init__(self, fname, int ftype, int flen):
self._init_fields(fname, ftype, flen)
def default_field_length(int ftype, len = 10):
if ftype == pxfLong:
return 4
elif ftype == pxfAlpha:
return len
elif ftype == pxfNumber:
return 8
else:
return 0
def type_to_field_type(type ftype):
if ftype == int:
return pxfLong
elif ftype == str:
return pxfAlpha
elif ftype == float:
return pxfNumber
else:
raise Exception("unsupported field type %s" % ftype)
cdef class Field(ParadoxField):
def __init__(self, fname, type t, int flen = 0):
ft = type_to_field_type(t)
fl = default_field_length(ft, flen)
ParadoxField.__init__(self, fname, ft, fl)
cdef class RecordField(ParadoxField):
"""
Represent a single field of a Record associated with some Table.
"""
cdef void *data
cdef Record record
def __init__(self, Record record, int index, int offset):
"""
Create a new instance, associated with the given `record`,
pointing to the index-th field, which data is displaced by
`offset` from the start of the record memory buffer.
"""
self.record = record
self.data = record.data+offset
ParadoxField.__init__(
self,
record.doc.px_doc.px_head.px_fields[index].px_fname,
record.doc.px_doc.px_head.px_fields[index].px_ftype,
record.doc.px_doc.px_head.px_fields[index].px_flen
)
def __str__(self):
return "{}: {}".format(self.getName(), self.getValue())
def getName(self):
return self.fname.decode(self.record.doc.targetEncoding)
def getType(self):
return self.ftype
type = property(getType)
def getValue(self):
"""
Get the field's value.
Return some Python value representing the current value of the field.
"""
cdef double value_double
cdef long value_long
cdef char value_char
cdef short value_short
cdef int year, month, day
cdef char *blobdata
cdef int size
cdef int mod_nr
if self.ftype == pxfAlpha:
codepage = self.record.doc.getCodePage()
size = strnlen(<char*> self.data, self.flen)
if size == 0:
return None
else:
py_string = PyUnicode_Decode(<char*> self.data, size, codepage, b"replace");
if not py_string:
raise Exception("Cannot get value from string %s" % self.fname)
return py_string
elif self.ftype == pxfDate:
if PX_get_data_long(self.record.doc.px_doc,
self.data, self.flen, &value_long) < 0:
raise Exception("Cannot extract long field '%s'" % self.fname)
if value_long:
PX_SdnToGregorian(value_long + 1721425,
&year, &month, &day)
return datetime.date(year, month, day)
else:
return None
elif self.ftype == pxfShort:
ret = PX_get_data_short(self.record.doc.px_doc,
self.data, self.flen, &value_short)
if ret < 0:
raise Exception("Cannot extract short field '%s'" % self.fname)
if ret == 0:
return None
return value_short
elif self.ftype == pxfLong or self.ftype == pxfAutoInc:
ret = PX_get_data_long(self.record.doc.px_doc,
self.data, self.flen, &value_long)
if ret < 0:
raise Exception("Cannot extract long field '%s'" % self.fname)
if ret == 0:
return None
return value_long
elif self.ftype == pxfCurrency or self.ftype == pxfNumber:
ret = PX_get_data_double(self.record.doc.px_doc,
self.data, self.flen, &value_double)
if ret < 0:
raise Exception("Cannot extract double field '%s'" % self.fname)
if ret == 0:
return None
return value_double
elif self.ftype == pxfLogical:
ret = PX_get_data_byte(self.record.doc.px_doc,
self.data, self.flen, &value_char)
if ret < 0:
raise Exception("Cannot extract boolean field '%s'" % self.fname)
if ret == 0:
return None
if value_char:
return True
else:
return False
elif self.ftype in [pxfMemoBLOb, pxfFmtMemoBLOb, pxfBLOb]:
if not self.record.doc.hasBlobFile():
return "[MISSING BLOB FILE]"
ret = PX_get_data_blob(self.record.doc.px_doc, self.data, self.flen,
&mod_nr, &size, &blobdata)
if ret < 0:
raise Exception("Cannot extract blob field '%s'" % self.fname)
if ret == 0:
return None
if blobdata and size > 0:
codepage = self.record.doc.getCodePage()
py_string = PyUnicode_Decode(<char*> blobdata, size, codepage, b"replace")
self.record.doc.px_doc.free(self.record.doc.px_doc, blobdata)
if not py_string:
raise Exception("Cannot get value from string %s" % self.fname)
return py_string
elif self.ftype == pxfGraphic:
if not self.record.doc.hasBlobFile():
return "[MISSING BLOB FILE]"
ret = PX_get_data_graphic(self.record.doc.px_doc, self.data, self.flen,
&mod_nr, &size, &blobdata)
if ret < 0:
raise Exception("Cannot extract graphic field '%s'" % self.fname)
if ret == 0:
return None
if blobdata and size > 0:
py_bytes = PyBytes_FromStringAndSize(blobdata, size)
self.record.doc.px_doc.free(self.record.doc.px_doc, blobdata)
return py_bytes
elif self.ftype == pxfOLE:
pass
elif self.ftype == pxfTime:
if PX_get_data_long(self.record.doc.px_doc,
self.data, self.flen, &value_long)<0:
raise Exception("Cannot extract long field '%s'" % self.fname)
if value_long:
return datetime.time(
value_long/3600000,
value_long/60000%60,
value_long%60000/1000.0
)
else:
return None
elif self.ftype == pxfTimestamp:
pass
elif self.ftype == pxfBCD:
pass
elif self.ftype == pxfBytes:
pass
elif self.ftype == pxfNumTypes:
pass
cdef class Record:
"""
An instance of this class wraps the memory buffer associated with a
single record of a given PXDoc.
"""
cdef void *data
cdef int current_fieldno
cdef PXDoc doc
cdef public fields
def __cinit__(self, PXDoc doc):
"""
Create a Record instance, allocating the memory buffer and
building the list of the Field instances.
"""
cdef int offset
self.data = doc.px_doc.malloc(
doc.px_doc,
doc.px_doc.px_head.px_recordsize,
"Memory for record"
)
self.current_fieldno = -1
self.doc = doc
self.fields = []
offset = 0
for i in range(len(self)):
field = RecordField(self, i, offset)
self.fields.append(field)
offset = offset + doc.px_doc.px_head.px_fields[i].px_flen
def __dealloc__(self):
if self.doc:
self.doc.px_doc.free(self.doc.px_doc, self.data)
def getFieldNames(self):
return [f.getName() for f in self.fields]
def __len__(self):
"""
Get number of fields in the record.
"""
return self.doc.px_doc.px_head.px_numfields
def read(self, recno):
"""
Read the data associated to the record numbered `recno`.
"""
if PX_get_record(self.doc.px_doc, recno, self.data) == NULL:
raise Exception("Couldn't get record {} from '{}'".format(
recno, self.doc.filename))
return True
def __str__(self):
return "{0}".format([(f.getName(), f.getValue()) for f in self.fields])
def __iter__(self):
return self
def next(self):
return self.__next__()
def __next__(self):
fieldno = self.current_fieldno + 1
try:
field = self.fields[fieldno]
self.current_fieldno = fieldno
return field
except IndexError:
self.current_fieldno = -1
raise StopIteration()
def __getitem__(self, key):
if isinstance(key, str):
for field in self.fields:
if field.getName() == key:
return field
raise KeyError("'" + key + "'")
else:
return self.fields[key]
# Sets up locale for pxlib
PX_boot()
# Shut down pxlib
def __dealloc__():
PX_shutdown()
atexit.register(__dealloc__)