From 5f9f4a6c7fbe2ed1b6d216f4701e08d781b3a9c0 Mon Sep 17 00:00:00 2001 From: Laborratte5 Date: Fri, 6 Jun 2025 14:44:18 +0200 Subject: [PATCH] Transform data and fit desicionTree --- src/cluster_test/cluster.py | 40 ++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/cluster_test/cluster.py b/src/cluster_test/cluster.py index 055f141..a74263b 100644 --- a/src/cluster_test/cluster.py +++ b/src/cluster_test/cluster.py @@ -1,8 +1,42 @@ +import numpy as np import pandas +from sklearn.impute import SimpleImputer +from sklearn.pipeline import make_pipeline +from sklearn import tree +from sklearn.preprocessing import OrdinalEncoder +from sklearn.tree import export_text -def read_excel(path: str): +NO_DATA: str = "NO DATA" + +def read_excel(path: str, target_column: str): data = pandas.read_excel(path) - print(data) + X = data.drop(target_column, axis=1) + y = data[target_column] + feature_names = data.columns.drop(target_column) + + for feat in feature_names: + if feat == target_column: + continue + + if feat.startswith("Messpunkt"): + # Convert to numerical value + X[feat] = X[feat].replace(to_replace=NO_DATA, value=np.nan) + X[feat] = X[feat].astype('float64') + else: + # Convert to categorical value + X[feat] = X[feat].astype('string') + + return X, y, feature_names + +def classify(X, y, feature_names=None): + clf = tree.DecisionTreeClassifier() + clf = clf.fit(X, y) + r = export_text(clf, feature_names=feature_names) + print(r) if __name__ == '__main__': - read_excel("tests/Beispiel Auswertung.xlsx") \ No newline at end of file + X, y, feature_names = read_excel("tests/Beispiel Auswertung2.xlsx", "Motornummer") + pipe = make_pipeline(OrdinalEncoder(), SimpleImputer()) + X = pipe.fit_transform(X) + print(X) + classify(X, y, feature_names)