import matplotlib.pyplot as pltfrom sklearn import datasetsimport numpy as npfrom sklearn.tree import DecisionTreeClassifiern_features = 200X, y = datasets.make_classification(750, 200,n_informative=5)#后面的P,是正负的比例training = np.random.choice([True, False], p=[.75, .25],size=len(y))c = 0for x in training: if(x == True): c = c+1print(c,c/750)accuracies = []for x in np.arange(1, n_features+1): dt = DecisionTreeClassifier(max_depth=x) dt.fit(X[training], y[training]) preds = dt.predict(X[~training]) accuracies.append((preds == y[~training]).mean())f, ax = plt.subplots(figsize=(7, 5))ax.plot(range(1, n_features+1), accuracies, color='k')ax.set_title("Decision Tree Accuracy")ax.set_ylabel("% Correct")ax.set_xlabel("Max Depth")f.show()N = 15f, ax = plt.subplots(figsize=(7, 5))ax.plot(range(1, n_features+1)[:N], accuracies[:N], color='k')ax.set_title("Decision Tree Accuracy")ax.set_ylabel("% Correct")ax.set_xlabel("Max Depth")f.show()'''老的版本无这个参数,这个参数很好,可以检查重要的特征dt_ci = DecisionTreeClassifier(compute_importances=True)dt.fit(X, y)ne0 = dt.feature_importances_ != 0y_comp = dt.feature_importances_[ne0]x_comp = np.arange(len(dt.feature_importances_))[ne0]f, ax = plt.subplots(figsize=(7, 5))ax.bar(x_comp, y_comp)f.show()'''