We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#划分数据集 def splitdataset(dataset,axis,value): retdataset_is =[] #创建返回是value的数据集列表 retdataset_not = [] #创建返回不是value的数据列表 for featVec in dataset:#抽取符合划分特征的值 if featVec[axis]==value: reducedfeatVec_is=featVec[:axis] #去掉axis特征 reducedfeatVec_is.extend(featVec[axis+1:])#将符合条件的特征添加到返回的数据集列表 retdataset_is.append(reducedfeatVec_is)
if featVec[axis] != value: reducedfeatVec_not=featVec[:axis] #去掉axis特征 reducedfeatVec_not.extend(featVec[axis+1:])#将符合条件的特征添加到返回的数据集列表 retdataset_not.append(reducedfeatVec_not) return retdataset_is, retdataset_not
#CART算法 def CART_chooseBestFeatureToSplit(dataset): numFeatures = len(dataset[0]) - 1 bestGini = 999999.0 bestFeature = -1
for i in range(numFeatures): featList = [example[i] for example in dataset] uniqueVals = set(featList) # 这里计算出每个feature中unique的值 gini = 0.0 for value in uniqueVals: print(value) # is value subdataset_is=splitdataset(dataset,i,value)[0] p_is=len(subdataset_is)/float(len(dataset)) subp_is = len(splitdataset(subdataset_is, -1, 'no')[0]) / float(len(subdataset_is)) # is not value subdataset_not=splitdataset(dataset,i,value)[1] p_not=len(subdataset_not)/float(len(dataset)) subp_not = len(splitdataset(subdataset_not, -1, 'no')[0]) / float(len(subdataset_not)) gini += p_is * (1.0 - pow(subp_is, 2) - pow(1 - subp_is, 2)) + \ p_not * (1.0 - pow(subp_not, 2) - pow(1 - subp_not, 2)) print(u"CART中第%d个特征的基尼值为:%.3f"%(i,gini)) if (gini < bestGini): bestGini = gini bestFeature = i return bestFeature
The text was updated successfully, but these errors were encountered:
No branches or pull requests
#划分数据集
def splitdataset(dataset,axis,value):
retdataset_is =[] #创建返回是value的数据集列表
retdataset_not = [] #创建返回不是value的数据列表
for featVec in dataset:#抽取符合划分特征的值
if featVec[axis]==value:
reducedfeatVec_is=featVec[:axis] #去掉axis特征
reducedfeatVec_is.extend(featVec[axis+1:])#将符合条件的特征添加到返回的数据集列表
retdataset_is.append(reducedfeatVec_is)
#CART算法
def CART_chooseBestFeatureToSplit(dataset):
numFeatures = len(dataset[0]) - 1
bestGini = 999999.0
bestFeature = -1
The text was updated successfully, but these errors were encountered: