Transform data and fit desicionTree

This commit is contained in:
Laborratte 5 2025-06-06 14:44:18 +02:00
parent e232b49424
commit 5f9f4a6c7f
Signed by: Laborratte5
GPG key ID: 3A30072E35202C02

View file

@ -1,8 +1,42 @@
import numpy as np
import pandas
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn import tree
from sklearn.preprocessing import OrdinalEncoder
from sklearn.tree import export_text
def read_excel(path: str):
NO_DATA: str = "NO DATA"
def read_excel(path: str, target_column: str):
data = pandas.read_excel(path)
print(data)
X = data.drop(target_column, axis=1)
y = data[target_column]
feature_names = data.columns.drop(target_column)
for feat in feature_names:
if feat == target_column:
continue
if feat.startswith("Messpunkt"):
# Convert to numerical value
X[feat] = X[feat].replace(to_replace=NO_DATA, value=np.nan)
X[feat] = X[feat].astype('float64')
else:
# Convert to categorical value
X[feat] = X[feat].astype('string')
return X, y, feature_names
def classify(X, y, feature_names=None):
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, y)
r = export_text(clf, feature_names=feature_names)
print(r)
if __name__ == '__main__':
read_excel("tests/Beispiel Auswertung.xlsx")
X, y, feature_names = read_excel("tests/Beispiel Auswertung2.xlsx", "Motornummer")
pipe = make_pipeline(OrdinalEncoder(), SimpleImputer())
X = pipe.fit_transform(X)
print(X)
classify(X, y, feature_names)