From df8f1c0dfbf8c9f9c24b2f484b939af180be749b Mon Sep 17 00:00:00 2001 From: cpaulson Date: Sat, 18 Jul 2015 13:44:07 +0100 Subject: [PATCH] added a cross-validation example --- examples/2D_leave_n_out.py | 59 +++++++++++++++++++++++++++++++ examples/2d_regression_Kriging.py | 27 ++++++++++---- pyKriging/CrossValidation.py | 26 +++++++++++--- pyKriging/krige.py | 10 ++++-- pyKriging/regressionkrige.py | 4 +-- pyKriging/testfunctions.py | 2 +- pyKriging/utilities.py | 2 +- 7 files changed, 112 insertions(+), 18 deletions(-) create mode 100644 examples/2D_leave_n_out.py diff --git a/examples/2D_leave_n_out.py b/examples/2D_leave_n_out.py new file mode 100644 index 0000000..565c1ab --- /dev/null +++ b/examples/2D_leave_n_out.py @@ -0,0 +1,59 @@ +__author__ = 'cpaulson' +import pyKriging +from pyKriging.krige import kriging +from pyKriging.samplingplan import samplingplan +from pyKriging.CrossValidation import Cross_Validation +from pyKriging.utilities import saveModel + +# The Kriging model starts by defining a sampling plan, we use an optimal Latin Hypercube here +sp = samplingplan(2) +X = sp.optimallhc(5) + +# Next, we define the problem we would like to solve +testfun = pyKriging.testfunctions().branin + +# We generate our observed values based on our sampling plan and the test function +y = testfun(X) + +print 'Setting up the Kriging Model' +cvMSE = [] +# Now that we have our initial data, we can create an instance of a kriging model +k = kriging(X, y, testfunction=testfun, name='simple', testPoints=300) +k.train(optimizer='ga') +k.snapshot() +# cv = Cross_Validation(k) +# cvMSE.append( cv.leave_n_out(q=5)[0] ) + +k.plot() +for i in range(15): + print i + newpoints = k.infill(1) + for point in newpoints: + # print 'Adding point {}'.format(point) + k.addPoint(point, testfun(point)[0]) + k.train(optimizer='pso') + k.snapshot() + # cv = Cross_Validation(k) + # cvMSE.append( cv.leave_n_out(q=5)[0] ) +k.plot() + + + +# saveModel(k, 'crossValidation.plk') + +# #And plot the model + +print 'Now plotting final results...' +# k.plot() + + +print k.testPoints +print k.history['points'] +print k.history['rsquared'] +print k.history['avgMSE'] +print cvMSE +from matplotlib import pylab as plt +plt.plot(range(len(k.history['rsquared'])), k.history['rsquared']) +plt.plot(range(len(cvMSE)), cvMSE) +plt.show() + diff --git a/examples/2d_regression_Kriging.py b/examples/2d_regression_Kriging.py index da050dc..a810465 100644 --- a/examples/2d_regression_Kriging.py +++ b/examples/2d_regression_Kriging.py @@ -5,9 +5,11 @@ from pyKriging.regressionkrige import regression_kriging from pyKriging.samplingplan import samplingplan + +from pyKriging.krige import kriging # The Kriging model starts by defining a sampling plan, we use an optimal Latin Hypercube here sp = samplingplan(2) -X = sp.optimallhc(25) +X = sp.optimallhc(30) # Next, we define the problem we would like to solve testfun = pyKriging.testfunctions().branin_noise @@ -16,25 +18,36 @@ y = testfun(X) print X, y +testfun = pyKriging.testfunctions().branin + + print 'Setting up the Kriging Model' # Now that we have our initial data, we can create an instance of a kriging model k = regression_kriging(X, y, testfunction=testfun, name='simple', testPoints=250) -k.train(optimizer='ga') +k.train(optimizer='pso') +k1 = kriging(X, y, testfunction=testfun, name='simple', testPoints=250) +k1.train(optimizer='pso') print k.Lambda -# k.snapshot() -# -for i in range(5): +k.snapshot() + + +for i in range(1): newpoints = k.infill(5) for point in newpoints: print 'Adding point {}'.format(point) - k.addPoint(point, testfun(point)[0]) + newValue = testfun(point)[0] + k.addPoint(point, newValue) + k1.addPoint(point, newValue) k.train() + k1.train() # k.snapshot() # # # #And plot the model print 'Now plotting final results...' -k.plot() +print k.Lambda +k.plot(show=False) +k1.plot() diff --git a/pyKriging/CrossValidation.py b/pyKriging/CrossValidation.py index 0dfe477..4303ad5 100644 --- a/pyKriging/CrossValidation.py +++ b/pyKriging/CrossValidation.py @@ -3,21 +3,24 @@ """ import numpy as np from matplotlib import pyplot as plt -from pyKrige.krige import kriging +import pyKriging +from pyKriging.krige import kriging +from pyKriging.utilities import * import random import scipy.stats as stats class Cross_Validation(): - def __init__(self, X, y, name): + def __init__(self, model, name=None): """ X- sampling plane y- Objective function evaluations name- the name of the model """ - self.X = X - self.y = y + self.model = model + self.X = self.model.X + self.y = self.model.y self.n, self.k = np.shape(self.X) self.predict_list, self.predict_varr, self.scvr = [], [], [] self.name = name @@ -209,4 +212,19 @@ def QQ_plot(self): plt.ylabel('Standard quantile') plt.show() + def leave_n_out(self, q=5): + ''' + :param q: the numer of groups to split the model data inot + :return: + ''' + mseArray = [] + for i in splitArrays(self.model,5): + testk = kriging( i[0], i[1] ) + testk.train() + for j in range(len(i[2])): + mseArray.append(mse(i[3][j], testk.predict( i[2][j] ))) + del(testk) + return np.average(mseArray), np.std(mseArray) + + ## Example Use Case: diff --git a/pyKriging/krige.py b/pyKriging/krige.py index bda14a3..9f174ca 100644 --- a/pyKriging/krige.py +++ b/pyKriging/krige.py @@ -18,6 +18,7 @@ import math as m + class kriging(matrixops): def __init__(self, X, y, testfunction=None, name='', testPoints=None, **kwargs): self.X = copy.deepcopy(X) @@ -36,7 +37,7 @@ def __init__(self, X, y, testfunction=None, name='', testPoints=None, **kwargs): self.updateData() self.updateModel() - self.thetamin = 1e-4 + self.thetamin = 1e-5 self.thetamax = 100 self.pmin = 1 self.pmax = 2 @@ -51,6 +52,7 @@ def __init__(self, X, y, testfunction=None, name='', testPoints=None, **kwargs): self.history['adjrsquared'] = [0] self.history['chisquared'] = [1000] self.history['lastPredictedPoints'] = [] + self.history['avgMSE'] = [] if testPoints: self.history['pointData'] = [] self.testPoints = self.sp.rlh(testPoints) @@ -116,10 +118,10 @@ def normalizeData(self): for i in range(self.k): self.normRange.append([min(self.X[:, i]), max(self.X[:, i])]) - print self.X + # print self.X for i in range(self.n): self.X[i] = self.normX(self.X[i]) - print self.X + #print self.X self.ynormRange.append(min(self.y)) self.ynormRange.append(max(self.y)) @@ -688,6 +690,8 @@ def snapshot(self): self.history['theta'].append(copy.deepcopy(self.theta)) self.history['p'].append(copy.deepcopy(self.pl)) + self.history['avgMSE'].append(self.calcuatemeanMSE(points=self.testPoints)[0]) + currentPredictions = [] if self.history['pointData']!=None: for pointprim in self.history['pointData']: diff --git a/pyKriging/regressionkrige.py b/pyKriging/regressionkrige.py index c7d504b..d3fc0b5 100644 --- a/pyKriging/regressionkrige.py +++ b/pyKriging/regressionkrige.py @@ -167,9 +167,9 @@ def updateModel(self): try: self.regupdatePsi() except Exception, err: - # pass + pass # print Exception, err - raise Exception("bad params") + # raise Exception("bad params") def predict(self, X, norm=True): ''' diff --git a/pyKriging/testfunctions.py b/pyKriging/testfunctions.py index bfced2c..cd59597 100644 --- a/pyKriging/testfunctions.py +++ b/pyKriging/testfunctions.py @@ -84,7 +84,7 @@ def branin_noise(self, X): noiseFree = ((a*( X2 - b*X1**2 + c*X1 - d )**2 + e*(1-ff)*np.cos(X1) + e)+5*x) withNoise=[] for i in noiseFree: - withNoise.append(i + np.random.standard_normal()*7) + withNoise.append(i + np.random.standard_normal()*15) return np.array(withNoise) diff --git a/pyKriging/utilities.py b/pyKriging/utilities.py index dcdb671..47b399b 100644 --- a/pyKriging/utilities.py +++ b/pyKriging/utilities.py @@ -9,7 +9,7 @@ def norm(x): return x-min(x) def saveModel(model, filePath): - pickle.dump(model, open(filePath, 'w')) + pickle.dump(model, open(filePath, 'w'), byref=True) def loadModel(filePath): return pickle.load(open(filePath,'r'))