Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CART算法是不是应该改成这样?大佬们批评指正! #10

Open
Linjian-PA opened this issue Jan 14, 2021 · 0 comments
Open

Comments

@Linjian-PA
Copy link

#划分数据集
def splitdataset(dataset,axis,value):
retdataset_is =[] #创建返回是value的数据集列表
retdataset_not = [] #创建返回不是value的数据列表
for featVec in dataset:#抽取符合划分特征的值
if featVec[axis]==value:
reducedfeatVec_is=featVec[:axis] #去掉axis特征
reducedfeatVec_is.extend(featVec[axis+1:])#将符合条件的特征添加到返回的数据集列表
retdataset_is.append(reducedfeatVec_is)

    if featVec[axis] != value:
        reducedfeatVec_not=featVec[:axis] #去掉axis特征
        reducedfeatVec_not.extend(featVec[axis+1:])#将符合条件的特征添加到返回的数据集列表
        retdataset_not.append(reducedfeatVec_not)
return retdataset_is, retdataset_not

#CART算法
def CART_chooseBestFeatureToSplit(dataset):
numFeatures = len(dataset[0]) - 1
bestGini = 999999.0
bestFeature = -1

for i in range(numFeatures):
    featList = [example[i] for example in dataset]
    uniqueVals = set(featList) # 这里计算出每个feature中unique的值
    gini = 0.0
    for value in uniqueVals:
        print(value)
        # is value
        subdataset_is=splitdataset(dataset,i,value)[0]
        p_is=len(subdataset_is)/float(len(dataset))
        subp_is = len(splitdataset(subdataset_is, -1, 'no')[0]) / float(len(subdataset_is))
        
        # is not value
        subdataset_not=splitdataset(dataset,i,value)[1]
        p_not=len(subdataset_not)/float(len(dataset))
        subp_not = len(splitdataset(subdataset_not, -1, 'no')[0]) / float(len(subdataset_not))
        
        gini += p_is * (1.0 - pow(subp_is, 2) - pow(1 - subp_is, 2)) + \
                p_not * (1.0 - pow(subp_not, 2) - pow(1 - subp_not, 2))

    print(u"CART中第%d个特征的基尼值为:%.3f"%(i,gini))
    if (gini < bestGini):
        bestGini = gini
        bestFeature = i
return bestFeature
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant