diff --git a/src/cluster_test/cluster.py b/src/cluster_test/cluster.py index 1973f47..f431ccf 100644 --- a/src/cluster_test/cluster.py +++ b/src/cluster_test/cluster.py @@ -1,3 +1,4 @@ +import argparse import numpy as np import pandas from sklearn.impute import SimpleImputer @@ -32,8 +33,18 @@ def classify(X, data_frame): return data_frame[y_pred < 0] if __name__ == '__main__': - data_frame = read_excel("tests/Beispiel Auswertung2.xlsx") - X = pandas.get_dummies(data_frame) # OneHotEncode categorical values + parser = argparse.ArgumentParser() + parser.add_argument("file") + parser.add_argument( + "--data-column", + default="Messpunkt", + help="Used to identify the columns containing measurements. The heading of each column containing data should contain the specified value." + ) + args = parser.parse_args() + + data_frame = read_excel(args.file) + X = data_frame.filter(like=args.data_column, axis='columns') + X = pandas.get_dummies(X) # OneHotEncode categorical values pipe = make_pipeline(SimpleImputer(add_indicator=True)) X = pipe.fit_transform(X) outlier = classify(X, data_frame)