Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 19:128bd4ab2b82 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
| author | bgruening |
|---|---|
| date | Fri, 13 Jul 2018 03:50:49 -0400 |
| parents | 0070e491573f |
| children | 090cb13556f1 |
comparison
equal
deleted
inserted
replaced
| 18:0070e491573f | 19:128bd4ab2b82 |
|---|---|
| 25 @COLUMNS_FUNCTION@ | 25 @COLUMNS_FUNCTION@ |
| 26 @GET_X_y_FUNCTION@ | 26 @GET_X_y_FUNCTION@ |
| 27 | 27 |
| 28 # Get inputs, outputs. | 28 # Get inputs, outputs. |
| 29 input_json_path = sys.argv[1] | 29 input_json_path = sys.argv[1] |
| 30 params = json.load(open(input_json_path, "r")) | 30 with open(input_json_path, "r") as param_handler: |
| 31 print params | 31 params = json.load(param_handler) |
| 32 print(params) | |
| 32 | 33 |
| 33 # Put all cheetah up here to avoid confusion. | 34 # Put all cheetah up here to avoid confusion. |
| 34 #if $selected_tasks.selected_task == "train": | 35 #if $selected_tasks.selected_task == "train": |
| 35 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" | 36 infile1 = "$selected_tasks.selected_algorithms.input_options.infile1" |
| 36 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" | 37 infile2 = "$selected_tasks.selected_algorithms.input_options.infile2" |
| 61 options["min_samples_leaf"] = 1 | 62 options["min_samples_leaf"] = 1 |
| 62 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | 63 if "min_samples_split" in options and options["min_samples_split"] > 1.0: |
| 63 options["min_samples_split"] = int(options["min_samples_split"]) | 64 options["min_samples_split"] = int(options["min_samples_split"]) |
| 64 | 65 |
| 65 X, y = get_X_y(params, infile1, infile2) | 66 X, y = get_X_y(params, infile1, infile2) |
| 66 | 67 |
| 67 my_class = getattr(sklearn.ensemble, algorithm) | 68 my_class = getattr(sklearn.ensemble, algorithm) |
| 68 estimator = my_class(**options) | 69 estimator = my_class(**options) |
| 69 estimator.fit(X,y) | 70 estimator.fit(X,y) |
| 70 pickle.dump(estimator,open(outfile_fit, 'w+'), pickle.HIGHEST_PROTOCOL) | 71 with open(outfile_fit, 'wb') as out_handler: |
| 72 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | |
| 71 | 73 |
| 72 else: | 74 else: |
| 73 classifier_object = pickle.load(open(infile_model, 'r')) | 75 with open(infile_model, 'rb') as model_handler: |
| 76 classifier_object = pickle.load(model_handler) | |
| 74 header = 'infer' if params["selected_tasks"]["header"] else None | 77 header = 'infer' if params["selected_tasks"]["header"] else None |
| 75 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | 78 data = pandas.read_csv(infile_data, sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) |
| 76 prediction = classifier_object.predict(data) | 79 prediction = classifier_object.predict(data) |
| 77 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) | 80 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) |
| 78 res = pandas.concat([data, prediction_df], axis=1) | 81 res = pandas.concat([data, prediction_df], axis=1) |
