# ------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
# ------------------------------------------------------------------------------------------
# From:
# https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/ml-frameworks/scikit-learn/train-hyperparameter-tune-deploy-with-sklearn/train_iris.py
import argparse
from pathlib import Path
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal, choice
from azureml.train.hyperdrive.sampling import RandomParameterSampling
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from health.azure.himl import submit_to_azure_if_needed
def main() -> None:
param_sampling = RandomParameterSampling({
"--kernel": choice('linear', 'rbf', 'poly', 'sigmoid'),
"--penalty": choice(0.5, 1, 1.5)
})
hyperdrive_config = HyperDriveConfig(
run_config=ScriptRunConfig(source_directory=""),
hyperparameter_sampling=param_sampling,
primary_metric_name='Accuracy',
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
max_total_runs=12,
max_concurrent_runs=4)
run_info = submit_to_azure_if_needed(
compute_cluster_name="lite-testing-ds2",
default_datastore="himldatasets",
input_datasets=["himl_sample7_input"],
wait_for_completion=True,
wait_for_completion_show_output=True,
hyperdrive_config=hyperdrive_config)
run = run_info.run
parser = argparse.ArgumentParser()
parser.add_argument('--kernel', type=str, default='linear',
help='Kernel type to be used in the algorithm')
parser.add_argument('--penalty', type=float, default=1.0,
help='Penalty parameter of the error term')
args = parser.parse_args()
run.log('Kernel type', np.str(args.kernel))
run.log('Penalty', np.float(args.penalty))
# X -> features, y -> label
input_folder = run_info.input_datasets[0] or Path("inputs")
X = np.loadtxt(fname=input_folder / "X.csv", delimiter=',', skiprows=1)
y = np.loadtxt(fname=input_folder / "y.csv", dtype='str', delimiter=',', skiprows=1)
# dividing X, y into train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
svm_predictions = svm_model_linear.predict(X_test)
# model accuracy for X_test
accuracy = svm_model_linear.score(X_test, y_test)
print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))
run.log('Accuracy', np.float(accuracy))
# creating a confusion matrix
cm = confusion_matrix(y_test, svm_predictions)
print(cm)
if __name__ == "__main__":
main()