Transform data and fit desicionTree
This commit is contained in:
parent
e232b49424
commit
5f9f4a6c7f
1 changed files with 37 additions and 3 deletions
|
|
@ -1,8 +1,42 @@
|
|||
import numpy as np
|
||||
import pandas
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn import tree
|
||||
from sklearn.preprocessing import OrdinalEncoder
|
||||
from sklearn.tree import export_text
|
||||
|
||||
def read_excel(path: str):
|
||||
NO_DATA: str = "NO DATA"
|
||||
|
||||
def read_excel(path: str, target_column: str):
|
||||
data = pandas.read_excel(path)
|
||||
print(data)
|
||||
X = data.drop(target_column, axis=1)
|
||||
y = data[target_column]
|
||||
feature_names = data.columns.drop(target_column)
|
||||
|
||||
for feat in feature_names:
|
||||
if feat == target_column:
|
||||
continue
|
||||
|
||||
if feat.startswith("Messpunkt"):
|
||||
# Convert to numerical value
|
||||
X[feat] = X[feat].replace(to_replace=NO_DATA, value=np.nan)
|
||||
X[feat] = X[feat].astype('float64')
|
||||
else:
|
||||
# Convert to categorical value
|
||||
X[feat] = X[feat].astype('string')
|
||||
|
||||
return X, y, feature_names
|
||||
|
||||
def classify(X, y, feature_names=None):
|
||||
clf = tree.DecisionTreeClassifier()
|
||||
clf = clf.fit(X, y)
|
||||
r = export_text(clf, feature_names=feature_names)
|
||||
print(r)
|
||||
|
||||
if __name__ == '__main__':
|
||||
read_excel("tests/Beispiel Auswertung.xlsx")
|
||||
X, y, feature_names = read_excel("tests/Beispiel Auswertung2.xlsx", "Motornummer")
|
||||
pipe = make_pipeline(OrdinalEncoder(), SimpleImputer())
|
||||
X = pipe.fit_transform(X)
|
||||
print(X)
|
||||
classify(X, y, feature_names)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue