-
Notifications
You must be signed in to change notification settings - Fork 12
/
_data_old.py
3433 lines (2618 loc) · 120 KB
/
_data_old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os as _os
import shutil as _shutil
# do this so all the scripts will work with all the numpy functions
import numpy as _n
import scipy.optimize as _opt
import scipy.special as _special
import scipy.odr as _odr
import pylab as _p
import textwrap as _textwrap
import spinmob as _s
import time as _time
# Things that belong here too
try: from . import _functions
except: _functions = _s.fun
averager = _functions.averager
#############################################################
# Class for storing / manipulating / saving / loading data
#############################################################
class databox:
"""
An object to hold, save, and load columns of data and header information.
Parameters
----------
delimiter
The delimiter the file uses. None (default) means "Try to figure it out" (reasonably smart)
debug
Displays some partial debug information while running
Additional optional keyword arguments are sent to self.h()
"""
# this is used by the load_file to rename some of the annoying
# column names that aren't consistent between different types of data files (and older data files)
# or to just rename columns with difficult-to-remember ckeys.
obnoxious_ckeys = {}
#obnoxious_ckeys = {"example_annoying1" : "unified_name1",
# "example_annoying2" : "unified_name2"}
directory = "default_directory"
path = "(no path)"
debug = False # Use this to print debug info in various places
delimiter = None # delimiter of the ascii file. If "None" this will just use any whitespace
headers = {} # this dictionary will hold the header information
columns = {} # this dictionary will hold the data columns
ckeys = [] # we need a special list of column keys to keep track of their order during data assembly
hkeys = [] # ordered list of header keys
extra_globals = {}
_is_spinmob_databox = True # Flag for type checking on inhereted objects (without need to import library)
def __init__(self, delimiter=None, debug=False, **kwargs):
# this keeps the dictionaries from getting all jumbled with each other
self.clear_columns()
self.clear_headers()
self.clear_averagers()
self.debug = debug
self.delimiter = delimiter
def __setitem__(self, n, x):
"""
set's the n'th column to x (n can be a column name too)
"""
if type(n) is str:
self.insert_column(data_array=x, ckey=n, index=None)
elif type(n) in [int, int] and n > len(self.ckeys)-1:
self.insert_column(data_array=x, ckey='_column'+str(len(self.ckeys)), index=None)
else:
self.columns[self.ckeys[n]] = _n.array(x)
def __len__(self):
return len(self.ckeys)
def _repr_tail(self):
"""
Returns the tail end of the __repr__ string with databox info (needed for inherited classes).
"""
# Find the maximum length of the columns, and whether they're matched
rows = None
matched = True
for n in range(len(self)):
c = self[n]
# Use the first length
if rows is None: rows = len(c)
# Change it if there are more in this column
elif len(c) != rows:
matched = False
if len(c) > rows: rows = len(c)
# Clean up formatting.
if rows is None: rows = 0
# Base tail.
s = str(len(self.hkeys))+" headers, "+str(len(self.ckeys))+" columns, "+str(rows)
if matched: return s+' rows>'
else: return s+' rows (max)>'
def __repr__(self):
return "<databox instance: "+self._repr_tail()
def __eq__(self, other):
return self.is_same_as(other)
def more_info(self):
"""
Prints out more information about the databox.
"""
print("\nDatabox Instance", self.path)
print("\nHeader")
for h in self.hkeys: print(" "+h+":", self.h(h))
s = "\nColumns ("+str(len(self.ckeys))+"): "
for c in self.ckeys: s = s+c+", "
print(s[:-2])
def _globals(self):
"""
Returns the globals needed for eval() statements.
"""
# start with numpy
globbies = dict(_n.__dict__)
globbies.update(_special.__dict__)
# update with required stuff
globbies.update({'h':self.h, 'c':self.c, 'd':self, 'self':self})
# update with user stuff
globbies.update(self.extra_globals)
return globbies
def load_file(self, path=None, first_data_line='auto', filters='*.*', text='Select a file, FACEPANTS.', default_directory=None, header_only=False, quiet=False):
"""
This will clear the databox, load a file, storing the header info in
self.headers, and the data in self.columns
If first_data_line="auto", then the first data line is assumed to be
the first line where all the elements are numbers.
If you specify a first_data_line (index, starting at 0), the columns
need not be numbers. Everything above will be considered header
information and below will be data columns.
In both cases, the line used to label the columns will always be the
last header line with the same (or more) number of elements as the
first data line.
Parameters
----------
path=None
Path to the file. Using None will bring up a dialog.
filters='*.*'
Filter for the file dialog (if path isn't specified)
text='Select a file, FACEPANTS.'
Prompt on file dialog
default_directory=None
Which spinmob.settings key to use for the dialog's default
directory. Will create one if it doesn't already exist.
header_only=False
Only load the header
quiet=False
Don't print anything while loading.
"""
# Set the default directory
if default_directory is None: default_directory = self.directory
# Ask user for a file to open
if path is None:
path = _s.dialogs.load(filters=filters,
default_directory=self.directory,
text=text)
self.path = path
if path is None:
return None
# make sure the file exists!
if not _os.path.exists(path):
if not quiet: print("ERROR: "+repr(path)+" does not exist.")
return None
# clear all the existing data
self.clear()
# First check if the file is SPINMOB_BINARY format!
f = open(path, 'rb')
# If this file is in SPINMOB_BINARY mode!
if f.read(14).decode('utf-8') == 'SPINMOB_BINARY':
# Next character is the delimiter
self.delimiter = f.read(1).decode('utf-8')
# Find the newline and get the data type
s = ' '
while not s[-1] == '\n': s = s+f.read(1).decode('utf-8')
# Rest of the line is the binary dtype
self.h(SPINMOB_BINARY = s.strip())
# Now manually assemble the header lines to use in the analysis
# below. If I try readline() on the binary file, it will crash.
lines = ['\n']
# The end of the header is specified by 'SPINMOB_BINARY' on its own line.
while not lines[-1] == 'SPINMOB_BINARY':
# Get the next line, one character at a time.
s = ' '
while not s[-1] == '\n': s = s+f.read(1).decode('utf-8')
# Okay we have it
lines.append(s.strip())
# Pop that last line, which should be 'SPINMOB_BINARY'
lines.pop(-1)
# We've reached the end of the header.
# Close the binary read.
f.close()
# If we're not in binary mode, we can read all the lines and find
# the delimiter as usual. (In binary mode, the delimiter is specified)
if not 'SPINMOB_BINARY' in self.hkeys:
# For non-binary files, we always read all the lines.
f = open(path, 'r')
lines = f.readlines()
f.close()
# Determine the delimiter
if self.delimiter is None:
# loop from the end of the file until we get something other than white space
for n in range(len(lines)):
# strip away the white space
s = lines[-n-1].strip()
# if this line has any content
if len(s) > 0:
# try the different delimiter schemes until we find one
# that produces a number. Otherwise it's ambiguous.
if _s.fun.is_a_number(s.split(None)[0]): self.delimiter = None
elif _s.fun.is_a_number(s.split(',') [0]): self.delimiter = ','
elif _s.fun.is_a_number(s.split(';') [0]): self.delimiter = ';'
# quit the loop!
break
# Done reading lines and auto-determining delimiter.
##### Pares the header from lines
self.header_lines = []
for n in range(len(lines)):
# split the line by the delimiter
s = lines[n].strip().split(self.delimiter)
# remove a trailing whitespace entry if it exists.
if len(s) and s[-1].strip() == '': s.pop(-1)
# first check and see if this is a data line (all elements are numbers)
if first_data_line == "auto" and _s.fun.elements_are_numbers(s):
# we've reached the first data line
first_data_line = n
# quit the header loop
break;
### after that check, we know it's a header line
# save the lines for the avid user.
self.header_lines.append(lines[n].strip())
# store the hkey and the rest of it
if len(s):
hkey = s[0]
if self.delimiter is None: remainder = ' '.join(s[1:])
else: remainder = self.delimiter.join(s[1:])
# first thing to try is simply evaluating the remaining string
try: self.insert_header(hkey, eval(remainder, self._globals()))
# otherwise store the string
except: self.insert_header(hkey, remainder)
# now we have a valid set of column ckeys one way or another, and we know first_data_line.
if header_only: return self
# Deal with the binary mode
if 'SPINMOB_BINARY' in self.hkeys:
# Read the binary file
f = open(path, 'rb')
s = f.read()
f.close()
# Get the delimiter for easier coding
delimiter = self.delimiter.encode('utf-8')
# Get the binary mode, e.g., 'float32'
binary = self.h('SPINMOB_BINARY')
# Number of bytes per element
size = eval('_n.'+binary+'().itemsize', dict(_n=_n))
# Location of first ckey
start = s.find(b'SPINMOB_BINARY',14) + 15
# Continue until we reach the last character.
while not start >= len(s):
# Get the location of the end of the ckey
stop = s.find(delimiter, start)
# Woa, Nelly! We're at the end of the file.
if stop == -1: break
ckey = s[start:stop].decode('utf-8').strip()
# Get the array shape
start = stop+1
stop = s.find(b'\n', start)
shape = eval(s[start:stop].strip())
if type(shape) == int: length = shape
else: length = _n.prod(_n.array(shape))
# Get the data!
start = stop+1
stop = start+size*length
self[ckey] = _n.frombuffer(s[start:stop], binary).reshape(shape)
# Go to next ckey
start = stop+1
# Otherwise we have a text file to load.
else:
# Make sure first_data_line isn't still 'auto'
# which happens if there's no data, or if it's a binary file
if first_data_line == "auto" and not 'SPINMOB_BINARY' in self.hkeys:
if not quiet: print("\ndatabox.load_file(): Could not find a line of pure data! Perhaps check the delimiter?")
return self
##### at this point we've found the first_data_line,
# look for the ckeys
# special case: no header
if first_data_line == 0: ckeys = []
# start by assuming it's the previous line
else: ckeys = lines[first_data_line-1].strip().split(self.delimiter)
# count the number of actual data columns for comparison
column_count = len(lines[first_data_line].strip().split(self.delimiter))
# check to see if ckeys is equal in length to the
# number of data columns. If it isn't, it's a false ckeys line
if len(ckeys) >= column_count:
# it is close enough
# if we have too many column keys, mention it
while len(ckeys) > column_count:
extra = ckeys.pop(-1)
if not quiet: print("Extra ckey: "+extra)
else:
# it is an invalid ckeys line. Generate our own!
ckeys = []
for m in range(0, column_count): ckeys.append("c"+str(m))
# last step with ckeys: make sure they're all different!
self.ckeys = []
while len(ckeys):
# remove the key
ckey = ckeys.pop(0)
# if there is a duplicate
if (ckey in ckeys) or (ckey in self.ckeys):
# increase the label index until it's unique
n=0
while (ckey+"_"+str(n) in ckeys) or (ckey+"_"+str(n) in self.ckeys): n+=1
ckey = ckey+"_"+str(n)
self.ckeys.append(ckey)
# initialize the columns arrays
# I did benchmarks and there's not much improvement by using numpy-arrays here.
for label in self.ckeys: self.columns[label] = []
# Python 2 format
#if _sys.version_info[0] == 2:
try:
def fix(x): return str(x.replace('i','j'))
# loop over the remaining data lines, converting to numbers
z = _n.genfromtxt((fix(x) for x in lines[first_data_line:]),
delimiter=self.delimiter,
dtype=_n.complex)
# Python 3 format
except:
def fix(x): return bytearray(x.replace('i','j'), encoding='utf-8')
# loop over the remaining data lines, converting to numbers
z = _n.genfromtxt((fix(x) for x in lines[first_data_line:]),
delimiter=self.delimiter,
dtype=_n.complex)
# genfromtxt returns a 1D array if there is only one data line.
# highly confusing behavior, numpy!
if len(_n.shape(z)) == 1:
# check to make sure the data file contains only 1 column of data
rows_of_data = len(lines) - first_data_line
if rows_of_data == 1: z = _n.array([z])
else: z = _n.array(z)
# fix for different behavior of genfromtxt on single columns
if len(z.shape) == 2: z = z.transpose()
else: z = [z]
# Add all the columns
for n in range(len(self.ckeys)):
# if any of the imaginary components are non-zero, use complex
if _n.any(_n.imag(z[n])): self[n] = z[n]
else: self[n] = _n.real(z[n])
# Done with loading in the columns of data
# now, as an added bonus, rename some of the obnoxious headers
for k in self.obnoxious_ckeys:
if k in self.columns:
self.columns[self.obnoxious_ckeys[k]] = self.columns[k]
return self
def save_file(self, path=None, filters='*.dat', force_extension=None, force_overwrite=False, header_only=False, delimiter='use current', binary=None):
"""
This will save all the header info and columns to an ascii file with
the specified path.
Parameters
----------
path=None
Path for saving the data. If None, this will bring up
a save file dialog.
filters='*.dat'
File filter for the file dialog (for path=None)
force_extension=None
If set to a string, e.g., 'txt', it will enforce that the chosen
filename will have this extension.
force_overwrite=False
Normally, if the file * exists, this will copy that
to *.backup. If the backup already exists, this
function will abort. Setting this to True will
force overwriting the backup file.
header_only=False
Only output the header?
delimiter='use current'
This will set the delimiter of the output file
'use current' means use self.delimiter
binary=None
Set to one of the allowed numpy dtypes, e.g., float32, float64,
complex64, int32, etc. Setting binary=True defaults to float64.
Note if the header contains the key SPINMOB_BINARY and binary=None,
it will save as binary using the header specification.
"""
# Make sure there isn't a problem later with no-column databoxes
if len(self)==0: header_only=True
# This is the final path. We now write to a temporary file in the user
# directory, then move it to the destination. This (hopefully) fixes
# problems with sync programs.
if path in [None]: path = _s.dialogs.save(filters, default_directory=self.directory)
if path in ["", None]:
print("Aborted.")
return False
# Force the extension (we do this here redundantly, because the user may have also
# specified a path explicitly)
if not force_extension is None:
# In case the user put "*.txt" instead of just "txt"
force_extension = force_extension.replace('*','').replace('.','')
# If the file doesn't end with the extension, add it
if not _os.path.splitext(path)[-1][1:] == force_extension:
path = path + '.' + force_extension
# Save the path for future reference
self.path=path
# if the path exists, make a backup
if _os.path.exists(path) and not force_overwrite:
_os.rename(path,path+".backup")
# get the delimiter
if delimiter == "use current":
if self.delimiter is None: delimiter = "\t"
else: delimiter = self.delimiter
# figure out the temporary path
temporary_path = _os.path.join(_s.settings.path_home, "temp-"+str(int(1e3*_time.time()))+'-'+str(int(1e9*_n.random.rand(1))))
# open the temporary file
f = open(temporary_path, 'w')
# Override any existing binary if we're supposed to
if binary in [False, 'text', 'Text', 'ASCII', 'csv', 'CSV']:
self.pop_header('SPINMOB_BINARY', True)
binary = None
# If the binary flag is any kind of binary format, add the key
if not binary in [None, False, 'text', 'Text', 'ASCII', 'csv', 'CSV']:
self.h(SPINMOB_BINARY=binary)
# Now use the header element to determine the binary mode
if 'SPINMOB_BINARY' in self.hkeys:
# Get the binary mode (we'll use this later)
binary = self.pop_header('SPINMOB_BINARY')
# If it's "True", default to float32
if binary in ['True', True, 1]: binary = 'float32'
# Write the special first key.
f.write('SPINMOB_BINARY' + delimiter + binary + '\n')
# Write the usual header
for k in self.hkeys:
h = self.h(k)
# Convert arrays to lists so we can get all the numbers.
if type(h) is _n.ndarray: h = h.tolist()
# Write it.
f.write(k + delimiter + repr(h).replace('\n',' ') + "\n")
f.write('\n')
# if we're not just supposed to write the header
if not header_only:
# Normal ascii saving mode.
if binary in [None, 'None', False, 'False']:
# First check if any of the columns are more than 1D and complain
alles_klar = True
for n in range(len(self)):
if len(_n.array(self[n]).shape) != 1: alles_klar = False
if not alles_klar: print('WARNING: You must save in binary mode if your columns have more than 1 dimension.')
# write the ckeys
elements = []
for ckey in self.ckeys: elements.append(str(ckey).replace(delimiter,'_'))
f.write(delimiter.join(elements) + "\n")
# now loop over the data
for n in range(0, len(self[0])):
# loop over each column
elements = []
for m in range(0, len(self.ckeys)):
# write the data if there is any, otherwise, placeholder
if n < len(self[m]):
elements.append(str(self[m][n]))
else:
elements.append('_')
f.write(delimiter.join(elements) + "\n")
# Binary mode
else:
# Announce that we're done with the header. It's binary time
f.write('SPINMOB_BINARY\n')
# Loop over the ckeys
for n in range(len(self.ckeys)):
# Get the binary data string
data_string = _n.array(self[n]).astype(binary).tostring()
# Write the column
# ckey + delimiter + count + \n + datastring + \n
f.write(str(self.ckeys[n]).replace(delimiter,'_')
+ delimiter + str(_n.array(self[n]).shape) + '\n')
f.close()
f = open(temporary_path, 'ab')
f.write(data_string)
f.close()
f = open(temporary_path, 'a')
f.write('\n')
f.close()
# now move it
_shutil.move(temporary_path, path)
return self
def set_binary_mode(self, binary='float64'):
"""
Sets the save_file() mode to binary.
Parameters
----------
binary=float64
Can be any of these objects found in numpy:
'float16', 'float32', 'float64', 'float128' (if supported),
'int8', 'int16', 'int32', 'int64', 'int128' (if supported),
'complex64', 'complex128', 'complex256' (if supported).
If False, this will set the save format to text mode.
If True, defaults to float64.
"""
if binary is True: self.h(SPINMOB_BINARY='float64')
elif binary: self.h(SPINMOB_BINARY=binary)
else: self.pop_header('SPINMOB_BINARY', ignore_error=True)
return self
def set_text_mode(self):
"""
Sets the save_file() format to text mode. Same as self.set_binary_mode(False).
"""
self.set_binary_mode(False)
return self
def get_row(self, n):
"""
Returns a list of the n'th row (starting at 0) from all columns.
Parameters
----------
n
Index of data point to return.
"""
# loop over the columns and pop the data
point = []
for k in self.ckeys: point.append(self[k][n])
return point
def get_data_point(self, n):
"""
Obsolete. Please use get_row() instead.
"""
print('OBSOLETE: Please use databox.get_row() instead of databox.get_data_point()')
return self.get_row(n)
def pop_row(self, n):
"""
This will remove and return the n'th row (starting at 0) from
all columns.
Parameters
----------
n
Index of data point to pop.
"""
# loop over the columns and pop the data
popped = []
for k in self.ckeys:
# first convert to a list
data = list(self.c(k))
# pop the data
popped.append(data.pop(n))
# now set this column again
self.insert_column(_n.array(data), k)
return popped
def pop_data_point(self, n):
"""
Obsolete.please use pop_row() instead.
"""
print('OBSOLETE: Please use databox.pop_row() instead of databox.pop_data_point()')
return self.pop_row(n)
def insert_row(self, new_data, index=None, ckeys=None):
"""
Inserts a row at index n.
Parameters
----------
new_data
A list or array of new data points, one for each column.
index=None
Where to insert the point(s) in each column. None => append.
ckeys=None
An optional list (of the same size as new_data) of ckeys. If this
list does not match the existing ckeys, it will clear the columns
and rebuild them, rather than overwriting.
"""
# Optional ckeys supplied
if not ckeys is None:
# Make sure they are a list for comparing with self.ckeys
ckeys = list(ckeys)
# If the ckeys do not match, rebuild the columns
if not self.ckeys == ckeys:
self.clear_columns()
for k in ckeys: self[k] = []
if not len(new_data) == len(self.columns) and not len(self.columns)==0:
print("ERROR: new_data must have as many elements as there are columns.")
return self
# otherwise, we just auto-add this data point as new columns
elif len(self.columns)==0:
for i in range(len(new_data)): self[i] = [new_data[i]]
# otherwise it matches length so just insert it.
else:
for i in range(len(new_data)):
# get the array and turn it into a list
data = list(self[i])
# append or insert
if index is None: data.append( new_data[i])
else: data.insert(index, new_data[i])
# reconvert to an array
self[i] = _n.array(data)
return self
def insert_data_point(self, *a, **kw):
"""
Obsolete. Please use insert_row() instead.
"""
print('OBSOLETE: Please use databox.insert_row() instead of databox.insert_data_point()')
return self.insert_row(*a, **kw)
def append_row(self, new_data, ckeys=None, history=0):
"""
Appends the supplied row (list) to the column(s).
Parameters
----------
new_data
A list or array of new data, one for each column.
ckeys=None
An optional list (of the same size as new_data) of ckeys. If this
list does not match the existing ckeys, it will clear the columns
and rebuild them, rather than overwriting.
history=None
If a positive integer is specified, after appending the data point,
it will pop the first data points off until the length of the
0th column is equal to the specified value.
"""
self.insert_row(new_data, None, ckeys)
if history > 0:
while len(self[0]) > history: self.pop_row(0)
return self
def append_data_point(self, *a, **kw):
"""
Obsolete. Use append_row() instead.
"""
print('OBSOLETE: Please use append_row() instead of append_data_point().')
return self.append_row(*a, **kw)
def execute_script(self, script, g=None):
"""
Runs a script, returning the result.
Parameters
----------
script
String script to be evaluated (see below).
g=None
Optional dictionary of additional globals for the script evaluation.
These will automatically be inserted into self.extra_globals.
Usage
-----
Scripts are of the form:
"3.0 + x/y - d[0] where x=3.0*c('my_column')+h('setting'); y=d[1]"
By default, "d" refers to the databox object itself, giving access to
everything and enabling complete control over the universe. Meanwhile,
c() and h() give quick reference to d.c() and d.h() to get columns and
header lines. Additionally, these scripts can see all of the numpy
functions like sin, cos, sqrt, etc.
If you would like access to additional globals in a script,
there are a few options in addition to specifying the g parametres.
You can set self.extra_globals to the appropriate globals dictionary
or add globals using self.insert_global(). Setting g=globals() will
automatically insert all of your current globals into this databox
instance.
There are a few shorthand scripts available as well. You can simply type
a column name such as 'my_column' or a column number like 2. However, I
only added this functionality as a shortcut, and something like
"2.0*a where a=my_column" will not work unless 'my_column is otherwise
defined. I figure since you're already writing a complicated script in
that case, you don't want to accidentally shortcut your way into using
a column instead of a constant! Use "2.0*a where a=c('my_column')"
instead.
"""
# add any extra user-supplied global variables for the eventual eval() call.
if not g==None: self.extra_globals.update(g)
# If the script is not a list of scripts, return the script value.
# This is the termination of a recursive call.
if not _s.fun.is_iterable(script):
# special case
if script is None: return None
# get the expression and variables dictionary
[expression, v] = self._parse_script(script)
# if there was a problem parsing the script
if v is None:
print("ERROR: Could not parse '"+script+"'")
return None
# get all the numpy stuff too
g = self._globals()
g.update(v)
# otherwise, evaluate the script using python's eval command
return eval(expression, g)
# Otherwise, this is a list of (lists of) scripts. Make the recursive call.
output = []
for s in script: output.append(self.execute_script(s))
return output
# Define this so you can quickly call a script
__call__ = execute_script
def _parse_script(self, script, n=0):
"""
This takes a script such as "a/b where a=c('current'), b=3.3" and returns
["a/b", {"a":self.columns["current"], "b":3.3}]
You can also just use an integer for script to reference columns by number
or use the column label as the script.
n is for internal use. Don't use it. In fact, don't use this function, user.
"""
if n > 1000:
print("This script ran recursively 1000 times!")
a = input("<enter> or (q)uit: ")
if a.strip().lower() in ['q', 'quit']:
script = None
if script is None: return [None, None]
# check if the script is simply an integer
if type(script) in [int,int]:
if script<0: script = script+len(self.ckeys)
return ["___"+str(script), {"___"+str(script):self[script]}]
# the scripts would like to use calls like "h('this')/3.0*c('that')",
# so to make eval() work we should add these functions to a local list
# first split up by "where"
split_script = script.split(" where ")
########################################
# Scripts without a "where" statement:
########################################
# if it's a simple script, like "column0" or "c(3)/2.0"
if len(split_script) is 1:
if self.debug: print("script of length 1")
# try to evaluate the script
# first try to evaluate it as a simple column label
if n==0 and script in self.ckeys:
# only try this on the zero'th attempt
# if this is a recursive call, there can be ambiguities if the
# column names are number strings
return ['___', {'___':self[script]}]
# Otherwise, evaluate it.
try:
b = eval(script, self._globals())
return ['___', {'___':b}]
except:
print()
print("ERROR: Could not evaluate '"+str(script)+"'")
return [None, None]
#######################################
# Full-on fancy scripts
#######################################
# otherwise it's a complicated script like "c(1)-a/2 where a=h('this')"
# tidy up the expression
expression = split_script[0].strip()
# now split the variables list up by ,
varsplit = split_script[1].split(';')
# loop over the entries in the list of variables, storing the results
# of evaluation in the "stuff" dictionary
stuff = dict()
for var in varsplit:
# split each entry by the "=" sign
s = var.split("=")
if len(s) == 1:
print(s, "has no '=' in it")
return [None, None]
# tidy up into "variable" and "column label"
v = s[0].strip()
c = s[1].strip()
# now try to evaluate c, given our current globbies
# recursively call this sub-script. At the end of all this mess
# we want the final return value to be the first expression
# and a full dictionary of variables to fill it
[x,y] = self._parse_script(c, n+1)
# if it's not working, just quit out.
if y is None: return [None, None]
stuff[v] = y[x]
# at this point we've found or generated the list