Opis klasifikora moguće je pročitati na scikit learn DummyClassifier.
Dodatni materijali:
import pandas as pd
from sklearn import metrics
from sklearn.dummy import DummyClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
(train_features, test_features, train_target, test_target) = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
baseline = DummyClassifier(strategy="most_frequent")
baseline.fit(train_features, train_target)
predictions = baseline.predict(test_features)
baseline_accuracy = metrics.accuracy_score(test_target, predictions)
print(f"Baseline accuracy: ", baseline_accuracy)
Baseline accuracy: 0.3
strategies = ['constant', 'most_frequent', 'stratified', 'prior', 'uniform']
baseline_arguments = [{'strategy': s} for s in strategies]
baseline_arguments[0]['constant'] = 0
accuracy = []
for n in baseline_arguments:
baseline = DummyClassifier(**n)
baseline.fit(train_features, train_target)
predictions = baseline.predict(test_features)
accuracy = metrics.accuracy_score(test_target, predictions)
display(pd.DataFrame({'accuracy': accuracy}, index=strategies))
Loading...