import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.stats as stats
from sklearn.metrics import accuracy_score, plot_confusion_matrix, ConfusionMatrixDisplay
import warnings
warnings.filterwarnings("ignore")


run_1_accelerometer = pd.read_csv("running_1/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
run_1_accelerometer_cut = run_1_accelerometer[100:]
# run_1_accelerometer.plot();
# run_1_accelerometer_cut.plot();


run_1_gyroscope = pd.read_csv("running_1/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)

run_1_gyroscope_cut = run_1_gyroscope[100:1000]
# run_1_gyroscope.plot();
# run_1_gyroscope_cut.plot();


run_2_accelerometer = pd.read_csv("running_2/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
run_2_accelerometer_cut = run_2_accelerometer[100:1000]
# run_2_accelerometer.plot();
# run_2_accelerometer_cut.plot();


run_2_gyroscope = pd.read_csv("running_2/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
run_2_gyroscope_cut = run_2_gyroscope[100:1000]
# run_2_gyroscope.plot();
# run_2_gyroscope_cut.plot();


run_3_accelerometer = pd.read_csv("running_3/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
run_3_accelerometer_cut = run_3_accelerometer[100:1000]
# run_3_accelerometer.plot();
# run_3_accelerometer_cut.plot();


run_3_gyroscope = pd.read_csv("running_3/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
run_3_gyroscope_cut = run_3_gyroscope[100:1000]
# run_3_gyroscope.plot();
# run_3_gyroscope_cut.plot();


run_4_accelerometer = pd.read_csv("running_4/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
run_4_accelerometer_cut = run_4_accelerometer[100:1000]
# run_4_accelerometer.plot();
# run_4_accelerometer_cut.plot();


run_4_gyroscope = pd.read_csv("running_4/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
run_4_gyroscope_cut = run_4_gyroscope[100:1000]
# run_4_gyroscope.plot();
# run_4_gyroscope_cut.plot();


run_5_accelerometer = pd.read_csv("running_5/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
run_5_accelerometer_cut = run_5_accelerometer[100:1000]
# run_5_accelerometer.plot();
# run_5_accelerometer_cut.plot();


run_5_gyroscope = pd.read_csv("running_5/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
run_5_gyroscope_cut = run_5_gyroscope[100:1000]
# run_5_gyroscope.plot();
# run_5_gyroscope_cut.plot();


run_6_accelerometer = pd.read_csv("running_6/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
run_6_accelerometer_cut = run_6_accelerometer[100:1000]
# run_6_accelerometer.plot();
# run_6_accelerometer_cut.plot();


run_6_gyroscope = pd.read_csv("running_6/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
run_6_gyroscope_cut = run_6_gyroscope[100:1000]
# run_6_gyroscope.plot();
# run_6_gyroscope_cut.plot();


walk_1_accelerometer = pd.read_csv("walking_1/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
walk_1_accelerometer_cut = walk_1_accelerometer[100:1000]
# walk_1_accelerometer.plot();
# walk_1_accelerometer_cut.plot()


walk_1_gyroscope = pd.read_csv("walking_1/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
walk_1_gyroscope_cut = walk_1_gyroscope[100:1000]
# walk_1_gyroscope.plot();
# walk_1_gyroscope_cut.plot();


walk_2_accelerometer = pd.read_csv("walking_2/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
walk_2_accelerometer_cut = walk_2_accelerometer[100:1000]
# walk_2_accelerometer.plot();
# walk_2_accelerometer_cut.plot();


walk_2_gyroscope = pd.read_csv("walking_2/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
walk_2_gyroscope_cut = walk_2_gyroscope[100:1000]
# walk_2_gyroscope.plot();
# walk_2_gyroscope_cut.plot();


walk_3_accelerometer = pd.read_csv("walking_3/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
walk_3_accelerometer_cut = walk_3_accelerometer[100:1000]
# walk_3_accelerometer.plot();
# walk_3_accelerometer_cut.plot()


walk_3_gyroscope = pd.read_csv("walking_3/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
walk_3_gyroscope_cut = walk_3_gyroscope[100:1000]
# walk_3_gyroscope.plot();
# walk_3_gyroscope_cut.plot();


walk_4_accelerometer = pd.read_csv("walking_4/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
walk_4_accelerometer_cut = walk_4_accelerometer[100:1000]
# walk_4_accelerometer.plot();
# walk_4_accelerometer_cut.plot();


walk_4_gyroscope = pd.read_csv("walking_4/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
walk_4_gyroscope_cut = walk_4_gyroscope[100:1000]
# walk_4_gyroscope.plot();
# walk_4_gyroscope_cut.plot();


walk_5_accelerometer = pd.read_csv("walking_5/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
walk_5_accelerometer_cut = walk_5_accelerometer[100:1000]
# walk_5_accelerometer.plot();
# walk_5_accelerometer_cut.plot();


walk_5_gyroscope = pd.read_csv("walking_5/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
walk_5_gyroscope_cut = walk_5_gyroscope[100:1000]
# walk_5_gyroscope.plot();
# walk_5_gyroscope_cut.plot();


walk_6_accelerometer = pd.read_csv("walking_6/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
walk_6_accelerometer_cut = walk_6_accelerometer[100:1000]
# walk_6_accelerometer.plot();
# walk_6_accelerometer_cut.plot();


walk_6_gyroscope = pd.read_csv("walking_6/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
walk_6_gyroscope_cut = walk_6_gyroscope[100:1000]
# walk_6_gyroscope.plot();
# walk_6_gyroscope_cut.plot();


jump_1_accelerometer = pd.read_csv("jumping_1/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
jump_1_accelerometer_cut = jump_1_accelerometer[100:1000]
# jump_1_accelerometer.plot();
# jump_1_accelerometer_cut.plot();


jump_1_gyroscope = pd.read_csv("jumping_1/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
jump_1_gyroscope_cut = jump_1_gyroscope[100:1000]
# jump_1_gyroscope.plot();
# jump_1_gyroscope_cut.plot();


jump_2_accelerometer = pd.read_csv("jumping_2/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
jump_2_accelerometer_cut = jump_2_accelerometer[100:1000]
# jump_2_accelerometer.plot();
# jump_2_accelerometer_cut.plot();


jump_2_gyroscope = pd.read_csv("jumping_2/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
jump_2_gyroscope_cut = jump_2_gyroscope[100:1000]
# jump_2_gyroscope.plot();
# jump_2_gyroscope_cut.plot();


jump_3_accelerometer = pd.read_csv("jumping_3/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
jump_3_accelerometer_cut = jump_3_accelerometer[100:1000]
# jump_3_accelerometer.plot();
# jump_3_accelerometer_cut.plot();


jump_3_gyroscope = pd.read_csv("jumping_3/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
jump_3_gyroscope_cut = jump_3_gyroscope[100:1000]
# jump_3_gyroscope.plot();
# jump_3_gyroscope_cut.plot();


jump_4_accelerometer = pd.read_csv("jumping_4/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
jump_4_accelerometer_cut = jump_4_accelerometer[100:1000]
# jump_4_accelerometer.plot();
# jump_4_accelerometer_cut.plot();


jump_4_gyroscope = pd.read_csv("jumping_4/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
jump_4_gyroscope_cut = jump_4_gyroscope[100:1000]
# jump_4_gyroscope.plot();
# jump_4_gyroscope_cut.plot();


jump_5_accelerometer = pd.read_csv("jumping_5/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
jump_5_accelerometer_cut = jump_5_accelerometer[100:1000]
# jump_5_accelerometer.plot();
# jump_5_accelerometer_cut.plot();


jump_5_gyroscope = pd.read_csv("jumping_5/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
jump_5_gyroscope_cut = jump_5_gyroscope[100:1000]
# jump_5_gyroscope.plot();
# jump_5_gyroscope_cut.plot();


jump_6_accelerometer = pd.read_csv("jumping_6/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
jump_6_accelerometer_cut = jump_6_accelerometer[100:1000]
# jump_6_accelerometer.plot();
# jump_6_accelerometer_cut.plot();


jump_6_gyroscope = pd.read_csv("jumping_6/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
jump_6_gyroscope_cut = jump_6_gyroscope[100:1000]
# jump_6_gyroscope.plot();
# jump_6_gyroscope_cut.plot();


bend_knee_1_accelerometer = pd.read_csv("bend_knee_1/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
bend_knee_1_accelerometer_cut = bend_knee_1_accelerometer[100:1000]
# bend_knee_1_accelerometer.plot();
# bend_knee_1_accelerometer_cut.plot();


bend_knee_1_gyroscope = pd.read_csv("bend_knee_1/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
bend_knee_1_gyroscope_cut = bend_knee_1_gyroscope[100:1000]
# bend_knee_1_gyroscope.plot();
# bend_knee_1_gyroscope_cut.plot();


bend_knee_2_accelerometer = pd.read_csv("bend_knee_2/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
bend_knee_2_accelerometer_cut = bend_knee_2_accelerometer[100:1000]
# bend_knee_2_accelerometer.plot();
# bend_knee_2_accelerometer_cut.plot();


bend_knee_2_gyroscope = pd.read_csv("bend_knee_2/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
bend_knee_2_gyroscope_cut = bend_knee_2_gyroscope[100:1000]
# bend_knee_2_gyroscope.plot();
# bend_knee_2_gyroscope_cut.plot();


bend_knee_3_accelerometer = pd.read_csv("bend_knee_3/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
bend_knee_3_accelerometer_cut = bend_knee_3_accelerometer[100:1000]
# bend_knee_3_accelerometer.plot();
# bend_knee_3_accelerometer_cut.plot();


bend_knee_3_gyroscope = pd.read_csv("bend_knee_3/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
bend_knee_3_gyroscope_cut = bend_knee_3_gyroscope[100:1000]
# bend_knee_3_gyroscope.plot();
# bend_knee_3_gyroscope_cut.plot();


bend_knee_4_accelerometer = pd.read_csv("bend_knee_4/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
bend_knee_4_accelerometer_cut = bend_knee_4_accelerometer[100:1000]
# bend_knee_4_accelerometer.plot();
# bend_knee_4_accelerometer_cut.plot();


bend_knee_4_gyroscope = pd.read_csv("bend_knee_4/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
bend_knee_4_gyroscope_cut = bend_knee_4_gyroscope[100:1000]
# bend_knee_4_gyroscope.plot();
# bend_knee_4_gyroscope_cut.plot();


bend_knee_5_accelerometer = pd.read_csv("bend_knee_5/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
bend_knee_5_accelerometer_cut = bend_knee_5_accelerometer[100:1000]
# bend_knee_5_accelerometer.plot();
# bend_knee_5_accelerometer_cut.plot();


bend_knee_5_gyroscope = pd.read_csv("bend_knee_5/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
bend_knee_5_gyroscope_cut = bend_knee_5_gyroscope[100:1000]
# bend_knee_5_gyroscope.plot();
# bend_knee_5_gyroscope_cut.plot();


bend_knee_6_accelerometer = pd.read_csv("bend_knee_6/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
bend_knee_6_accelerometer_cut = bend_knee_6_accelerometer[100:1000]
# bend_knee_6_accelerometer.plot();
# bend_knee_6_accelerometer_cut.plot();


bend_knee_6_gyroscope = pd.read_csv("bend_knee_6/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
bend_knee_6_gyroscope_cut = bend_knee_6_gyroscope[100:1000]
# bend_knee_6_gyroscope.plot();
# bend_knee_6_gyroscope_cut.plot();


all_in_one_1_accelerometer = pd.read_csv("all_in_one_1/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
all_in_one_1_accelerometer_cut = all_in_one_1_accelerometer[100:1000]
# all_in_one_1_accelerometer.plot();
# all_in_one_1_accelerometer_cut.plot();


all_in_one_1_gyroscope = pd.read_csv("all_in_one_1/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
all_in_one_1_gyroscope_cut = all_in_one_1_gyroscope[100:1000]
# all_in_one_1_gyroscope.plot();
# all_in_one_1_gyroscope_cut.plot();


all_in_one_2_accelerometer = pd.read_csv("all_in_one_2/Accelerometer.csv", usecols=[2,3,4],
                            names=['ax', 'ay', 'az'], header=0)
all_in_one_2_accelerometer_cut = all_in_one_2_accelerometer[100:1000]
# all_in_one_2_accelerometer.plot();
# all_in_one_2_accelerometer_cut.plot();


all_in_one_2_gyroscope = pd.read_csv("all_in_one_2/Gyroscope.csv", usecols=[2,3,4],
                            names=['gx', 'gy', 'gz'], header=0)
all_in_one_2_gyroscope_cut = all_in_one_2_gyroscope[100:1000]
# all_in_one_2_gyroscope.plot();
# all_in_one_2_gyroscope_cut.plot();


def read_all_files(accelerometer,gyroscope, clas):
    accelerometer_set = accelerometer

    gyroscope_set = gyroscope
    return pd.concat([accelerometer_set, gyroscope_set], axis=1).dropna().assign(class_=clas)

def training_data():
    run_train_1 = read_all_files(run_1_accelerometer_cut, run_1_gyroscope_cut, "running")
    run_train_2 = read_all_files(run_2_accelerometer_cut, run_2_gyroscope_cut, "running")
    run_train_3 = read_all_files(run_3_accelerometer_cut, run_3_gyroscope_cut, "running")
    run_train_4 = read_all_files(run_4_accelerometer_cut, run_4_gyroscope_cut, "running")
    run_train_5 = read_all_files(run_5_accelerometer_cut, run_5_gyroscope_cut, "running")

    walk_train_1 = read_all_files(walk_1_accelerometer_cut, walk_1_gyroscope_cut, "walking")
    walk_train_2 = read_all_files(walk_2_accelerometer_cut, walk_2_gyroscope_cut, "walking")
    walk_train_3 = read_all_files(walk_3_accelerometer_cut, walk_3_gyroscope_cut, "walking")
    walk_train_4 = read_all_files(walk_4_accelerometer_cut, walk_4_gyroscope_cut, "walking")
    walk_train_5 = read_all_files(walk_5_accelerometer_cut, walk_5_gyroscope_cut, "walking")

    jump_train_1 = read_all_files(jump_1_accelerometer_cut, jump_1_gyroscope_cut, "jumping")
    jump_train_2 = read_all_files(jump_2_accelerometer_cut, jump_2_gyroscope_cut, "jumping")
    jump_train_3 = read_all_files(jump_3_accelerometer_cut, jump_3_gyroscope_cut, "jumping")
    jump_train_4 = read_all_files(jump_4_accelerometer_cut, jump_4_gyroscope_cut, "jumping")
    jump_train_5 = read_all_files(jump_5_accelerometer_cut, jump_5_gyroscope_cut, "jumping")

    bend_knee_train_1 = read_all_files(bend_knee_1_accelerometer_cut, bend_knee_1_gyroscope_cut, "bend_knee")
    bend_knee_train_2 = read_all_files(bend_knee_2_accelerometer_cut, bend_knee_2_gyroscope_cut, "bend_knee")
    bend_knee_train_3 = read_all_files(bend_knee_3_accelerometer_cut, bend_knee_3_gyroscope_cut, "bend_knee")
    bend_knee_train_4 = read_all_files(bend_knee_4_accelerometer_cut, bend_knee_4_gyroscope_cut, "bend_knee")
    bend_knee_train_5 = read_all_files(bend_knee_5_accelerometer_cut, bend_knee_5_gyroscope_cut, "bend_knee")

    

    return [run_train_1, run_train_2, run_train_3, run_train_4, run_train_5, 
            walk_train_1, walk_train_2, walk_train_3, walk_train_4, walk_train_5,
           jump_train_1, jump_train_2, jump_train_3, jump_train_4, jump_train_5,
           bend_knee_train_1, bend_knee_train_2, bend_knee_train_3, bend_knee_train_4, bend_knee_train_5] 

def testing_data():
    run_test = read_all_files(run_6_accelerometer_cut, run_6_gyroscope_cut, "running")
    
    walk_test = read_all_files(walk_6_accelerometer_cut, walk_6_gyroscope_cut, "walking")

    jump_test = read_all_files(jump_6_accelerometer_cut, jump_6_gyroscope_cut, "jumping")

    bend_knee_test = read_all_files(bend_knee_6_accelerometer_cut, bend_knee_6_gyroscope_cut, "bend_knee")

    return [run_test, walk_test, jump_test, bend_knee_test]


# training_data() plot
pd.concat(training_data(), ignore_index=True).plot();


# testing_data() plot
pd.concat(testing_data(), ignore_index=True).plot();


sns.pairplot(pd.concat(training_data(), ignore_index=True), hue="class_", height =2.5);


sns.pairplot(pd.concat(testing_data(), ignore_index=True), hue="class_", height =2.5);


training_data_store = pd.concat(training_data(), ignore_index=True)
training_data_store.to_csv('cut_training_data_file/cut_training_data.csv', index=False)


testing_data_store = pd.concat(testing_data(), ignore_index=True)
testing_data_store.to_csv('cut_testing_data_file/cut_testing_data.csv', index=False)


training_data_store


testing_data_store


testing_data_store.isnull().values.any()

False


training_data_store.isnull().values.any()

False


from sklearn.model_selection import train_test_split
X = training_data_store.drop(['class_'], axis='columns')
y = training_data_store['class_']

print(X.shape)
print(y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

print(len(X_train))

print(len(X_test))

print(X_train.shape)
print(y_train.shape)

(17825, 6)
(17825,)
14260
3565
(14260, 6)
(14260,)


from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

for weights in ["uniform", "distance"]:
    knn = KNeighborsClassifier(weights=weights)

    k_range = list(range(1, 100))
    param_grid = dict(n_neighbors=k_range)

    # defining parameter range
    grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)

    # fitting the model for grid search
    grid_search=grid.fit(X_train, y_train)

    print(f"{grid_search.best_params_}")

    accuracy = grid_search.best_score_ *100
    print(f"Accuracy [{weights}] for our training dataset with tuning is {accuracy}\n\n" )

Fitting 10 folds for each of 99 candidates, totalling 990 fits
{'n_neighbors': 1}
Accuracy [uniform] for our training dataset with tuning is 87.0687237026648


Fitting 10 folds for each of 99 candidates, totalling 990 fits
{'n_neighbors': 4}
Accuracy [distance] for our training dataset with tuning is 87.19495091164096


#grid['mean_score_test']
#means = grid.cv_results_['mean_test_score']
#plt.plot(k_range, means)
grid.best_score_

0.8719495091164096


from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=4, weights='distance')


knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=4, weights='distance')

KNeighborsClassifier(n_neighbors=4, weights='distance')


knn.score(X_test, y_test)

0.8788218793828892


# -9.955962	-3.697164	9.846040	0.071157	-0.217381	-0.255795	bend_knee
knn.predict([[-9.955962, -3.697164, 9.846040, 0.071157, -0.217381, -0.255795]])

array(['bend_knee'], dtype=object)


from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
ConfusionMatrixDisplay.from_estimator(grid, X_test, y_test);

[[850  21  17  52]
 [ 46 704  74  55]
 [ 20  33 779  59]
 [ 21  14  20 800]]


X_train


from sklearn.preprocessing import StandardScaler

# Creating StandardScaler Object
scaler = StandardScaler()
# print(scaler.fit(data))

# X_train = X_train.drop('class_', axis=1)

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

X_train = pd.DataFrame(X_train)

X_test = pd.DataFrame(X_test)

print(len(X_train))
print(len(X_test))

# Creating Classifier Model
model=KNeighborsClassifier(n_neighbors=4, weights='distance')
model.fit(X_train,y_train)

# Accuracy on Training Data
score=model.score(X_test,y_test)
print("Accuracy for our training dataset using Standard Scaler is : {:.3f}%".format(score*100) )

14260
3565
Accuracy for our training dataset using Standard Scaler is : 92.482%


from sklearn.model_selection import train_test_split
X = testing_data_store.drop(['class_'], axis='columns')
y = testing_data_store['class_']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

print(len(X_train))

print(len(X_test))

2880
720


from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

for weights in ["uniform", "distance"]:
    knn = KNeighborsClassifier(weights=weights)

    k_range = list(range(1, 100))
    param_grid = dict(n_neighbors=k_range)

    # defining parameter range
    grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)

    # fitting the model for grid search
    grid_search=grid.fit(X_train, y_train)

    print(f"{grid_search.best_params_}")

    accuracy = grid_search.best_score_ *100
    print(f"Accuracy [{weights}] for our testing dataset with tuning is {accuracy}\n\n" )

Fitting 10 folds for each of 99 candidates, totalling 990 fits
{'n_neighbors': 1}
Accuracy [uniform] for our testing dataset with tuning is 87.60416666666669


Fitting 10 folds for each of 99 candidates, totalling 990 fits
{'n_neighbors': 1}
Accuracy [distance] for our testing dataset with tuning is 87.60416666666669


from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1, weights='distance')


knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=1, weights='distance')

KNeighborsClassifier(n_neighbors=1, weights='distance')


knn.score(X_test, y_test)

0.8958333333333334


# -0.329969	-6.566999	6.440067	-0.487907	-0.773408	-0.255150	running

knn.predict([[-0.329969, -6.566999, 6.440067, -0.487907, -0.773408, -0.255150]])

array(['running'], dtype=object)


from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
ConfusionMatrixDisplay.from_estimator(grid, X_test, y_test);

[[166   4   2   2]
 [ 12 154   9   4]
 [  5   4 163  14]
 [  5   2  12 162]]


from sklearn.preprocessing import StandardScaler

# Creating StandardScaler Object
scaler = StandardScaler()
# print(scaler.fit(data))

# X_train = X_train.drop('class_', axis=1)

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

X_train = pd.DataFrame(X_train)

X_test = pd.DataFrame(X_test)

print(len(X_train))
print(len(X_test))

# Creating Classifier Model
model=KNeighborsClassifier(n_neighbors=1, weights='distance')
model.fit(X_train,y_train)

# Accuracy on Training Data
score=model.score(X_test,y_test)
print("Accuracy for our testing dataset using Standard Scaler is : {:.3f}%".format(score*100) )

2880
720
Accuracy for our testing dataset using Standard Scaler is : 93.889%


Total_data_used_statistical_values_training_data_store_describe = training_data_store.describe()
Total_data_used_statistical_values_training_data_store_describe


sns.pairplot(pd.concat(training_data(), ignore_index=True), hue="class_", height =2.5);


Total_data_used_statistical_values_testing_data_store_describe  = testing_data_store.describe()
Total_data_used_statistical_values_testing_data_store_describe


sns.pairplot(pd.concat(testing_data(), ignore_index=True), hue="class_", height =2.5);


# acceleration

d = pd.concat(training_data(), ignore_index=True)

a = np.sqrt((d['ax'] **2) + (d['ay'] **2) + (d['az'] **2))
a

norm_acceleration= d.assign(m_norm_acceleration=a)

theta_acceleration = d.assign(my_theta_acceleration=np.arccos(d['az'] / norm_acceleration['m_norm_acceleration'])) 

class_ = d['class_']

# rategyro
a = np.sqrt((d['gx'] **2) + (d['gy'] **2) + (d['gz'] **2))
a 

norm_rategyro = d.assign(m_norm_rategyro=a)

theta_rategyro = d.assign(my_theta_rategyro=np.arccos(d['gy'] /norm_rategyro['m_norm_rategyro']))


df = pd.DataFrame({'class_' : class_,
                   'norm_acceleration': norm_acceleration['m_norm_acceleration'],
                   'theta_acceleration': theta_acceleration['my_theta_acceleration'],
                    'norm_rategyro': norm_rategyro['m_norm_rategyro'],
                   'theta_rategyro': theta_rategyro['my_theta_rategyro']
                  })
df
# df = pd.concat([df], axis=1).dropna().assign(class_=class_)


training_data_store


run = training_data_store['class_'].str.contains('running')
if (run.any()):
    print(training_data_store[run])

            ax         ay         az        gx        gy        gz   class_
0     3.536667  -8.764157   8.219295 -0.426663  0.376293  0.468237  running
1     3.972804 -12.131095  11.188315 -0.133465 -0.126467  0.777247  running
2     3.418738 -14.802985  12.352308  0.195966 -0.441429  0.986390  running
3     1.820551 -16.844654   8.020269  0.370743 -0.536392  1.180035  running
4     0.771038 -16.254522   3.320959 -0.066549 -0.279340  1.371567  running
...        ...        ...        ...       ...       ...       ...      ...
4495 -6.606886  -4.424983  -4.107464 -0.418879  1.336381  0.219632  running
4496 -7.275557  -6.001708  -5.381811 -0.432911  1.277232  0.625369  running
4497 -7.173545  -6.372599  -6.186866 -0.282813  0.786166  1.166578  running
4498 -7.410753  -8.059927  -7.771310 -0.056985 -0.008063  1.433666  running
4499 -7.690779 -12.510910  -1.063169  1.048035  0.516007  2.052018  running

[4500 rows x 7 columns]


run_statistical_values_training_data_store_describe = training_data_store[run].describe()
run_statistical_values_training_data_store_describe


run_list = [training_data_store[run]]

sns.pairplot(pd.concat(run_list, ignore_index=True), height =2.5);


jump = training_data_store['class_'].str.contains('jumping')
if (jump.any()):
    print(training_data_store[jump])

              ax         ay         az        gx        gy        gz   class_
8825    3.241912 -13.474079   7.663859  0.448358 -0.223612 -0.345523  jumping
8826    3.226168 -11.907581   6.437711  0.616293 -0.367828 -0.525466  jumping
8827    2.664996 -10.304280   4.496675  0.660625 -0.486336 -0.569012  jumping
8828    2.882513  -8.580618   2.771164  0.488605 -0.823586 -0.571246  jumping
8829    3.687789  -8.448391   2.221647 -0.277926 -0.764507 -0.695077  jumping
...          ...        ...        ...       ...       ...       ...      ...
13320  -4.133810 -14.072485 -12.496738  2.834519  2.962801 -1.359437  jumping
13321  -7.947897  -6.630090 -11.768182  2.187142  1.589594 -0.944904  jumping
13322  -9.582275  -2.473383  -2.501960  1.497248  0.307335 -0.489233  jumping
13323 -10.397558  -3.243473   5.110979  0.465863 -0.070232 -0.020804  jumping
13324  -9.043609  -4.898709   5.657096 -1.078474  0.561385  0.207205  jumping

[4500 rows x 7 columns]


jump_statistical_values_training_data_store_describe = training_data_store[jump].describe()
jump_statistical_values_training_data_store_describe


jump_list = [training_data_store[jump]]

sns.pairplot(pd.concat(jump_list, ignore_index=True), height =2.5);


walk = training_data_store['class_'].str.contains('walking')
if (walk.any()):
    print(training_data_store[walk])

            ax        ay        az        gx        gy        gz   class_
4500 -2.006896 -7.116554  4.699732  0.041452 -0.587792  0.198252  walking
4501 -1.830023 -6.753287  4.192621 -0.004119 -0.716754  0.176837  walking
4502 -1.560215 -6.247737  3.953517 -0.093619 -0.899281  0.125611  walking
4503 -1.409974 -6.082172  4.246691 -0.150919 -0.966371  0.082659  walking
4504 -1.212864 -6.145609  5.386715 -0.207450 -0.927834  0.021939  walking
...        ...       ...       ...       ...       ...       ...      ...
8820  2.219875 -2.280391 -5.598362 -0.786829  1.433788  0.173172  walking
8821  2.369082 -1.991162 -5.703477 -1.031978  0.561385 -0.354948  walking
8822  2.184450 -2.232325 -5.271151 -1.168830  0.010245 -0.711536  walking
8823  0.314665 -5.304507 -4.289749 -1.261542 -0.609382 -1.066256  walking
8824 -1.617743 -8.312736 -3.499221 -1.268436 -0.888058 -1.139299  walking

[4325 rows x 7 columns]


walk_statistical_values_training_data_store_describe = training_data_store[walk].describe()
walk_statistical_values_training_data_store_describe


walk_list = [training_data_store[walk]]

sns.pairplot(pd.concat(walk_list, ignore_index=True), height =2.5);


bend_knee = training_data_store['class_'].str.contains('bend_knee')
if (bend_knee.any()):
    print(training_data_store[bend_knee])

             ax        ay        az        gx        gy        gz     class_
13325 -4.387365 -6.716186 -0.015658 -1.090517  0.808576 -2.368394  bend_knee
13326 -2.745575 -5.280220  0.215947 -1.099680  0.861111 -2.340731  bend_knee
13327 -1.124988 -4.220719 -0.793802 -1.132387  0.795015 -2.374311  bend_knee
13328  2.272768 -3.300302 -0.082188 -1.190419  0.673819 -2.649776  bend_knee
13329  3.303864 -4.243952 -0.700687 -1.022431  0.564439 -3.205908  bend_knee
...         ...       ...       ...       ...       ...       ...        ...
17820 -4.225421 -7.347326  2.834419  1.315053  0.989951  0.788295  bend_knee
17821 -3.888998 -6.893213  2.058209  1.380154  1.288926  0.862489  bend_knee
17822 -4.683547 -6.589226  1.510789  1.444906  1.497039  0.967244  bend_knee
17823 -5.238657 -6.579218  0.651558  1.428796  1.569016  1.079469  bend_knee
17824 -5.052723 -6.786039 -0.916491  1.325403  1.399684  1.295959  bend_knee

[4500 rows x 7 columns]


bend_knee_statistical_values_training_data_store_describe = training_data_store[bend_knee].describe()
bend_knee_statistical_values_training_data_store_describe


bend_knee_list = [training_data_store[bend_knee]]

sns.pairplot(pd.concat(bend_knee_list, ignore_index=True), height =2.5);


run = testing_data_store['class_'].str.contains('running')
if (run.any()):
    print(testing_data_store[run])

           ax        ay        az        gx        gy        gz   class_
0   -1.303614 -6.411490  4.077556 -0.421829 -0.733195 -0.274802  running
1   -0.998066 -6.549138  4.557804 -0.496127 -0.755675 -0.248011  running
2   -0.687327 -6.553696  4.994870 -0.511329 -0.774472 -0.243334  running
3   -0.329969 -6.566999  6.440067 -0.487907 -0.773408 -0.255150  running
4   -0.195836 -6.753056  6.938913 -0.338978 -0.745797 -0.273982  running
..        ...       ...       ...       ...       ...       ...      ...
895 -1.142676 -7.028859 -0.988777 -0.586012 -1.856193  0.250106  running
896 -1.131136 -7.112148 -1.290695 -0.652561 -1.006078  0.268362  running
897 -0.377307 -7.173612 -1.347782 -0.657099 -0.327581  0.327912  running
898  0.466563 -6.896718 -1.469705 -0.629261 -0.020630  0.342940  running
899  1.515338 -6.184529 -1.835979 -0.574021  0.106744  0.319099  running

[900 rows x 7 columns]


run_statistical_values_testing_data_store_describe = testing_data_store[run].describe()
run_statistical_values_testing_data_store_describe


run_list = [testing_data_store[run]]

sns.pairplot(pd.concat(run_list, ignore_index=True), height =2.5);


jump = testing_data_store['class_'].str.contains('jumping')
if (jump.any()):
    print(testing_data_store[jump])

            ax        ay         az        gx        gy        gz   class_
1800 -0.087044 -5.947197   0.115391  0.017401  0.993337  1.039029  jumping
1801 -3.963151 -5.011601  -0.823739 -0.028711  0.860831  1.331791  jumping
1802 -1.872601 -4.339836  -1.011885 -0.237976  0.032481  1.774040  jumping
1803  0.497735 -2.944035   2.831833 -0.426942 -0.241519  1.772679  jumping
1804 -0.522606 -2.340553   4.859693 -0.637359  0.036041  1.633907  jumping
...        ...       ...        ...       ...       ...       ...      ...
2695 -7.338276 -9.363675  19.157799  1.549992  5.654972  1.061370  jumping
2696 -3.477166 -6.216439  12.904528  2.177909  6.611516  1.524720  jumping
2697 -1.564142 -4.503015   9.139798  2.528912  6.211801  1.693475  jumping
2698 -0.148220 -3.000577   5.078973  3.001844  4.924796  1.915062  jumping
2699  0.918569 -1.990214   2.149169  3.224478  3.867231  2.036049  jumping

[900 rows x 7 columns]


jump_statistical_values_testing_data_store_describe = testing_data_store[jump].describe()
jump_statistical_values_testing_data_store_describe


jump_list = [testing_data_store[jump]]

sns.pairplot(pd.concat(jump_list, ignore_index=True), height =2.5);


walk = testing_data_store['class_'].str.contains('walking')
if (walk.any()):
    print(testing_data_store[walk])

            ax         ay        az        gx        gy        gz   class_
900  -1.251526  -7.742667  6.135784  0.062518  0.024609  0.179402  walking
901  -1.362013  -7.610392  6.003968  0.128037  0.198933  0.235218  walking
902  -1.516372  -7.467812  5.857481  0.146189  0.468010  0.300825  walking
903  -1.752326  -7.158730  5.429475  0.087895  0.671707  0.332712  walking
904  -1.915112  -6.941700  4.959340  0.005288  0.715742  0.328907  walking
...        ...        ...       ...       ...       ...       ...      ...
1795 -7.246837  -7.239175 -1.813177 -1.654450 -2.499696  0.044262  walking
1796 -6.397882  -7.754753 -1.809940 -1.834323 -2.472625  0.086446  walking
1797 -5.976033  -8.979453 -1.606672 -2.002189 -2.421958  0.079343  walking
1798 -5.786172  -9.977942 -1.058812 -2.072334 -2.397297  0.071279  walking
1799 -5.205358 -11.112898  0.078137 -2.086925 -2.370140  0.061820  walking

[900 rows x 7 columns]


walk_statistical_values_testing_data_store_describe = testing_data_store[walk].describe()
walk_statistical_values_testing_data_store_describe


walk_list = [testing_data_store[walk]]

sns.pairplot(pd.concat(walk_list, ignore_index=True), height =2.5);


bend_knee = testing_data_store['class_'].str.contains('bend_knee')
if (bend_knee.any()):
    print(testing_data_store[bend_knee])

             ax        ay         az        gx        gy        gz     class_
2700  -1.225285 -6.566021   8.820689  2.282384  2.715191  0.595367  bend_knee
2701  -0.180054 -4.831317   8.226717  2.427700  2.934492  0.580252  bend_knee
2702   1.262855 -2.240925   4.933741  2.670563  2.550415  0.415580  bend_knee
2703   1.593897 -0.712763  -2.102032  2.518527  1.848199  0.211202  bend_knee
2704   1.437402 -0.495849  -6.156948  2.119825  1.633576  0.114947  bend_knee
...         ...       ...        ...       ...       ...       ...        ...
3595  -9.171450 -3.374264   8.320090  0.772116  0.337809  0.808978  bend_knee
3596 -10.878668 -3.192045   8.108212  0.496721  0.239599  0.469790  bend_knee
3597 -10.963327 -3.435191   8.807771  0.264854 -0.078453 -0.017017  bend_knee
3598  -9.955962 -3.697164   9.846040  0.071157 -0.217381 -0.255795  bend_knee
3599  -9.054172 -3.656607  12.560192 -0.259478 -0.084963 -0.418268  bend_knee

[900 rows x 7 columns]


bend_knee_statistical_values_testing_data_store_describe = testing_data_store[bend_knee].describe()
bend_knee_statistical_values_testing_data_store_describe


bend_knee_list = [testing_data_store[bend_knee]]

sns.pairplot(pd.concat(bend_knee_list, ignore_index=True), height =2.5);


ax_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['ax'],
               'walk': walk_statistical_values_training_data_store_describe['ax'],
               'run': run_statistical_values_training_data_store_describe['ax'],
               'jump': jump_statistical_values_training_data_store_describe['ax'],
               'bend_knee': bend_knee_statistical_values_training_data_store_describe['ax']

})
ax_describe_training_data


ay_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['ay'],
               'walk': walk_statistical_values_training_data_store_describe['ay'],
               'run': run_statistical_values_training_data_store_describe['ay'],
               'jump': jump_statistical_values_training_data_store_describe['ay'],
               'bend_knee': bend_knee_statistical_values_training_data_store_describe['ay']

})
ay_describe_training_data


az_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['az'],
               'walk': walk_statistical_values_training_data_store_describe['az'],
               'run': run_statistical_values_training_data_store_describe['az'],
               'jump': jump_statistical_values_training_data_store_describe['az'],
               'bend_knee': bend_knee_statistical_values_training_data_store_describe['az']

})
az_describe_training_data


gx_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['gx'],
               'walk': walk_statistical_values_training_data_store_describe['gx'],
               'run': run_statistical_values_training_data_store_describe['gx'],
               'jump': jump_statistical_values_training_data_store_describe['gx'],
               'bend_knee': bend_knee_statistical_values_training_data_store_describe['gx']

})
gx_describe_training_data


gy_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['gy'],
               'walk': walk_statistical_values_training_data_store_describe['gy'],
               'run': run_statistical_values_training_data_store_describe['gy'],
               'jump': jump_statistical_values_training_data_store_describe['gy'],
               'bend_knee': bend_knee_statistical_values_training_data_store_describe['gy']

})
gy_describe_training_data


gz_describe_training_data= pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['gz'],
               'walk': walk_statistical_values_training_data_store_describe['gz'],
               'run': run_statistical_values_training_data_store_describe['gz'],
               'jump': jump_statistical_values_training_data_store_describe['gz'],
               'bend_knee': bend_knee_statistical_values_training_data_store_describe['gz']

})
gz_describe_training_data


ax_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['ax'],
               'walk': walk_statistical_values_testing_data_store_describe['ax'],
               'run': run_statistical_values_testing_data_store_describe['ax'],
               'jump': jump_statistical_values_testing_data_store_describe['ax'],
               'bend_knee': bend_knee_statistical_values_testing_data_store_describe['ax']
})
ax_describe_testing_data


ay_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['ay'],
               'walk': walk_statistical_values_testing_data_store_describe['ay'],
               'run': run_statistical_values_testing_data_store_describe['ay'],
               'jump': jump_statistical_values_testing_data_store_describe['ay'],
               'bend_knee': bend_knee_statistical_values_testing_data_store_describe['ay']

})
ay_describe_testing_data


az_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['az'],
               'walk': walk_statistical_values_testing_data_store_describe['az'],
               'run': run_statistical_values_testing_data_store_describe['az'],
               'jump': jump_statistical_values_testing_data_store_describe['az'],
               'bend_knee': bend_knee_statistical_values_testing_data_store_describe['az']

})
az_describe_testing_data


gx_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['gx'],
               'walk': walk_statistical_values_testing_data_store_describe['gx'],
               'run': run_statistical_values_testing_data_store_describe['gx'],
               'jump': jump_statistical_values_testing_data_store_describe['gx'],
               'bend_knee': bend_knee_statistical_values_testing_data_store_describe['gx']

})
gx_describe_testing_data


gy_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['gy'],
               'walk': walk_statistical_values_testing_data_store_describe['gy'],
               'run': run_statistical_values_testing_data_store_describe['gy'],
               'jump': jump_statistical_values_testing_data_store_describe['gy'],
               'bend_knee': bend_knee_statistical_values_testing_data_store_describe['gy']

})
gy_describe_testing_data


gz_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['gz'],
               'walk': walk_statistical_values_testing_data_store_describe['gz'],
               'run': run_statistical_values_testing_data_store_describe['gz'],
               'jump': jump_statistical_values_testing_data_store_describe['gz'],
               'bend_knee': bend_knee_statistical_values_testing_data_store_describe['gz']

})
gz_describe_testing_data


import seaborn as sns
import matplotlib.pyplot as plt


training_data_store.corr()


sns.heatmap(training_data_store.corr(), annot=True)
plt.show()


testing_data_store.corr()


sns.heatmap(testing_data_store.corr(), annot=True)
plt.show()


# training_data_store.drop(['class_'], axis='columns')

training_data_store_bend_knee = testing_data_store[bend_knee].drop(['class_'], axis='columns')
training_data_store_jump = testing_data_store[jump].drop(['class_'], axis='columns')
training_data_store_walk = testing_data_store[walk].drop(['class_'], axis='columns')
training_data_store_run = testing_data_store[run].drop(['class_'], axis='columns')


testing_data_store_bend_knee = testing_data_store[bend_knee].drop(['class_'], axis='columns')
testing_data_store_jump = testing_data_store[jump].drop(['class_'], axis='columns')
testing_data_store_walk = testing_data_store[walk].drop(['class_'], axis='columns')
testing_data_store_run = testing_data_store[run].drop(['class_'], axis='columns')


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_run)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_walk)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_jump)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_bend_knee)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_run)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_walk)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_jump)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_bend_knee)
 
# plotting the Curve
x.plot()
 
# Display
plt.show()


import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt


import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from scipy.special import softmax
onehot_encoder = OneHotEncoder(sparse=False)


def loss(X, Y, W):
    """
    Y: onehot encoded
    """
    Z = -X @ W
    N = X.shape[0]
    loss = 1/N * (np.trace(X @ W @ Y.T) + np.sum(np.log(np.sum(np.exp(Z), axis=1))))
    return loss


def gradient(X, Y, W, mu):
    """
    Y: onehot encoded
    """
    Z = - X @ W
    P = softmax(Z, axis=1)
    N = X.shape[0]
    gd = 1/N * (X.T @ (Y - P)) + 2 * mu * W
    return gd


def gradient_descent(X, Y, max_iter=1000, eta=0.1, mu=0.01):
    """
    Very basic gradient descent algorithm with fixed eta and mu
    """
    Y_onehot = onehot_encoder.fit_transform(Y.reshape(-1, 1))
    W = np.zeros((X.shape[1], Y_onehot.shape[1]))
    step = 0
    step_1st = []
    loss_lst = []
    W_lst = []
    
    while step < max_iter:
        step += 1
        W -= eta * gradient(X, Y_onehot, W, mu)
        step_1st.append(step)
        W_lst.append(W)
        loss_lst.append(loss(X, Y_onehot, W))
        
    df = pd.DataFrame({
        'step': step_1st,
        'loss': loss_lst
    })
    return df, W


class Multiclass:

    def fit(self, X, Y):
        self.loss_steps, self.W = gradient_descent(X, Y)
   
    def loss_plot(self):
        return self.loss_steps.plot(
            x='step',
            y='loss',
            xlabel='step',
            ylabel='loss'
        )
    
    def predict(self, H):
        Z = - H @ self.W
        P = softmax(Z, axis=1)
        return np.argmax(P, axis=1)


testing_data_store
testing_data_store.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3600 entries, 0 to 3599
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ax      3600 non-null   float64
 1   ay      3600 non-null   float64
 2   az      3600 non-null   float64
 3   gx      3600 non-null   float64
 4   gy      3600 non-null   float64
 5   gz      3600 non-null   float64
 6   class_  3600 non-null   object 
dtypes: float64(6), object(1)
memory usage: 197.0+ KB


pd.api.types.is_string_dtype(testing_data_store['class_'])

True


for label, content in testing_data_store.items():
    if pd.api.types.is_string_dtype(content):
        print(label)

class_


for label, content in testing_data_store.items():
    if pd.api.types.is_string_dtype(content):
        testing_data_store[label] = content.astype('category').cat.as_ordered()


testing_data_store['class_'].cat.codes

0       2
1       2
2       2
3       2
4       2
       ..
3595    0
3596    0
3597    0
3598    0
3599    0
Length: 3600, dtype: int8


testing_data_store['class_']

0         running
1         running
2         running
3         running
4         running
          ...    
3595    bend_knee
3596    bend_knee
3597    bend_knee
3598    bend_knee
3599    bend_knee
Name: class_, Length: 3600, dtype: category
Categories (4, object): ['bend_knee' < 'jumping' < 'running' < 'walking']


testing_data_store['class_'] = testing_data_store['class_'].cat.codes


testing_data_store


testing_data_store.shape

(3600, 7)


new_df = pd.concat([testing_data_store], axis=1).dropna().assign()

X = new_df.drop('class_', axis='columns').values
y = new_df['class_'].values

model = Multiclass()
model.fit(X, y)


model.loss_plot();


model.predict(X)

array([1, 1, 1, ..., 0, 0, 0], dtype=int64)

y

array([2, 2, 2, ..., 0, 0, 0], dtype=int8)


model.predict(X) == y

array([False, False, False, ...,  True,  True,  True])


score = np.mean(model.predict(X) == y)
print(f'Softmax Score for testing data: {score}')

Softmax Score for testing data: 0.4363888888888889


from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation


X = new_df.drop('class_', axis='columns').values
y = new_df['class_']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test


# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))

DecisionTree Accuracy: 0.812962962962963


for label, content in training_data_store.items():
    if pd.api.types.is_string_dtype(content):
        print(label)

class_


for label, content in training_data_store.items():
    if pd.api.types.is_string_dtype(content):
        training_data_store[label] = content.astype('category').cat.as_ordered()


training_data_store['class_'] = training_data_store['class_'].cat.codes


new_df = pd.concat([training_data_store], axis=1).dropna().assign()

X = new_df.drop('class_', axis='columns').values
y = new_df['class_'].values

model = Multiclass()
model.fit(X, y)


model.loss_plot();


model.predict(X)

array([1, 3, 3, ..., 2, 2, 2], dtype=int64)

y

array([2, 2, 2, ..., 0, 0, 0], dtype=int8)


model.predict(X) == y

array([False, False, False, ..., False, False, False])


arr = []
a = 0

for i in (model.predict(X_train) == y_train):
    arr.append(i)
    if i == True:
        a += 1
print(a)
print(len(arr))

score = a / len(arr)
print(f'Softmax Score for training data: {score}')

748
2520
Softmax Score for training data: 0.2968253968253968


from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation


X = new_df.drop('class_', axis='columns').values
y = new_df['class_']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test


# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))

DecisionTree Accuracy: 0.8305908750934929


from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

new_df = pd.concat([training_data_store], axis=1).dropna().assign()

X = new_df.drop(['class_', 'ax', 'ay', 'az'], axis='columns').values
y = new_df['class_'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test


# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))

DecisionTree Accuracy: 0.56245325355273


from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

new_df = pd.concat([testing_data_store], axis=1).dropna().assign()

X = new_df.drop(['class_', 'ax', 'ay', 'az'], axis='columns').values
y = new_df['class_'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test


# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))

DecisionTree Accuracy: 0.5916666666666667


def read_all_files(accelerometer,gyroscope, clas):
    accelerometer_set = accelerometer

    gyroscope_set = gyroscope
    return pd.concat([accelerometer_set, gyroscope_set], axis=1).dropna().assign(class_=clas)

all_movement_1 = read_all_files(all_in_one_1_accelerometer, all_in_one_1_gyroscope, None)

X_ = all_movement_1.drop(['class_'], axis='columns')
y = all_movement_1['class_']
# y = all_movement_1['class_'].cat.codes
# # testing_data_store['class_'].cat.codesall_in_one_1_accelerometer_cut
X_.plot();
# y


y_train

array([2, 0, 0, ..., 3, 2, 3], dtype=int8)

X_


new_df = pd.concat([training_data_store], axis=1).dropna().assign()
new_df


n_neighbors = 4


new_df = pd.concat([training_data_store], axis=1).dropna().assign()

X = new_df.drop(['class_'], axis='columns')
y = new_df['class_']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test


knn = KNeighborsClassifier(n_neighbors)
knn.fit(X_train, y_train)


y_pred = knn.predict(X_)
# y_true=y_pred
print(len(y_pred))
print(len(y_test[:2959]))

cm= confusion_matrix(y_test[:2959], y_pred)

f, ax =plt.subplots(figsize = (5,5))

sns.heatmap(cm,annot = True, linewidths= 0.5, linecolor="red", fmt=".0f", ax=ax);
plt.xlabel("y_pred");
plt.ylabel("y_true");

2959
2959

	ax	ay	az	gx	gy	gz
1966	1.570903	1.767323	5.805029	-0.934955	-2.183075	-1.920682
6584	3.373976	-9.811429	-0.365259	-0.370149	0.377253	-0.718849
11434	-0.410357	2.805525	-1.407302	-1.180279	0.371930	0.524698
13665	-4.804913	-5.455974	1.120869	-2.360837	0.023318	-0.450382
801	-6.805039	-3.854358	-3.486446	-0.235427	0.857009	0.504941
...	...	...	...	...	...	...
10955	0.445532	-31.626226	17.553684	1.036185	2.632986	0.304403
17289	-11.245843	-3.027372	9.695894	-1.803309	0.618824	-0.661218
5192	-7.766551	-14.789760	-1.387919	3.597769	-0.839905	2.393161
12172	8.263492	-11.748184	-20.384186	1.199669	10.835231	-0.605874
235	-2.914155	-1.697288	5.323143	-0.783164	0.406871	-0.246528

	ax	ay	az	gx	gy	gz
count	17825.000000	17825.000000	17825.000000	17825.000000	17825.000000	17825.000000
mean	-2.986353	-8.950275	2.074440	-0.031268	0.095956	0.043261
std	4.853279	7.784934	8.915541	1.941447	2.518209	1.561716
min	-58.355330	-73.522530	-65.680550	-9.467172	-24.216618	-5.364095
25%	-6.024376	-12.290586	-1.960957	-1.291544	-1.093938	-0.876417
50%	-3.116935	-7.741853	1.317041	-0.201044	0.033458	-0.076044
75%	0.355663	-3.917728	5.650498	1.384884	1.021890	0.817460
max	34.771310	19.047120	77.439890	15.596314	16.606546	10.029377

	ax	ay	az	gx	gy	gz
count	3600.000000	3600.000000	3600.000000	3600.000000	3600.000000	3600.000000
mean	-3.146436	-8.909527	2.227296	-0.012130	0.109259	0.060217
std	4.588190	7.544367	8.876328	1.966366	2.119607	1.326979
min	-22.241808	-72.455290	-54.965710	-8.880236	-20.100677	-4.693068
25%	-5.784240	-11.836035	-2.239646	-1.277027	-1.039195	-0.803118
50%	-3.168965	-7.640913	1.244525	-0.272053	0.058032	0.021660
75%	-0.084896	-4.115906	5.800447	1.431523	1.058581	0.862476
max	17.217682	16.545313	75.672134	12.783716	10.861376	5.727054

	class_	norm_acceleration	theta_acceleration	norm_rategyro	theta_rategyro
0	running	12.524986	0.854982	0.736806	1.034788
1	running	16.974246	0.851129	0.798699	1.729806
2	running	19.580492	0.888151	1.098284	1.984415
3	running	18.745172	1.128675	1.348202	1.979977
4	running	16.608213	1.369480	1.401305	1.771483
...	...	...	...	...	...
17820	bend_knee	8.937076	1.248071	1.825042	0.997474
17821	bend_knee	8.177831	1.316379	2.076064	0.900969
17822	bend_knee	8.224111	1.386045	2.294436	0.859964
17823	bend_knee	8.435293	1.493477	2.380866	0.851294
17824	bend_knee	8.510011	1.678701	2.322783	0.924055

	ax	ay	az	gx	gy	gz
count	4500.000000	4500.000000	4500.000000	4500.000000	4500.000000	4500.000000
mean	-3.155088	-10.370162	0.210956	-0.033843	0.277159	0.078657
std	4.865273	8.646869	7.915198	1.673507	3.341670	2.241965
min	-19.415750	-37.804590	-25.676895	-4.784052	-8.252998	-4.036597
25%	-6.932648	-15.377623	-3.924741	-1.100696	-1.797942	-1.491318
50%	-3.904732	-9.199481	-0.329322	-0.296401	-0.293163	-0.582800
75%	0.901872	-3.546445	4.436671	1.062046	1.967501	1.272764
max	16.223417	4.778022	69.153336	5.208708	12.455402	9.648895

A first classification¶

`Correlation`¶

`Classification`¶

`Train, Validate and Test`¶

	ax	ay	az	gx	gy	gz	class_
0	3.536667	-8.764157	8.219295	-0.426663	0.376293	0.468237	running
1	3.972804	-12.131095	11.188315	-0.133465	-0.126467	0.777247	running
2	3.418738	-14.802985	12.352308	0.195966	-0.441429	0.986390	running
3	1.820551	-16.844654	8.020269	0.370743	-0.536392	1.180035	running
4	0.771038	-16.254522	3.320959	-0.066549	-0.279340	1.371567	running
...	...	...	...	...	...	...	...
17820	-4.225421	-7.347326	2.834419	1.315053	0.989951	0.788295	bend_knee
17821	-3.888998	-6.893213	2.058209	1.380154	1.288926	0.862489	bend_knee
17822	-4.683547	-6.589226	1.510789	1.444906	1.497039	0.967244	bend_knee
17823	-5.238657	-6.579218	0.651558	1.428796	1.569016	1.079469	bend_knee
17824	-5.052723	-6.786039	-0.916491	1.325403	1.399684	1.295959	bend_knee

	ax	ay	az	gx	gy	gz	class_
0	-1.303614	-6.411490	4.077556	-0.421829	-0.733195	-0.274802	running
1	-0.998066	-6.549138	4.557804	-0.496127	-0.755675	-0.248011	running
2	-0.687327	-6.553696	4.994870	-0.511329	-0.774472	-0.243334	running
3	-0.329969	-6.566999	6.440067	-0.487907	-0.773408	-0.255150	running
4	-0.195836	-6.753056	6.938913	-0.338978	-0.745797	-0.273982	running
...	...	...	...	...	...	...	...
3595	-9.171450	-3.374264	8.320090	0.772116	0.337809	0.808978	bend_knee
3596	-10.878668	-3.192045	8.108212	0.496721	0.239599	0.469790	bend_knee
3597	-10.963327	-3.435191	8.807771	0.264854	-0.078453	-0.017017	bend_knee
3598	-9.955962	-3.697164	9.846040	0.071157	-0.217381	-0.255795	bend_knee
3599	-9.054172	-3.656607	12.560192	-0.259478	-0.084963	-0.418268	bend_knee

	ax	ay	az	gx	gy	gz
count	4325.000000	4325.000000	4325.000000	4325.000000	4325.000000	4325.000000
mean	-3.069930	-9.333466	0.603748	-0.072456	0.000396	0.078736
std	3.914733	3.331278	3.941401	1.116711	1.371160	0.772815
min	-18.453377	-24.694840	-13.449025	-2.630403	-5.044455	-2.700984
25%	-5.089412	-11.313857	-1.673116	-0.836676	-0.907466	-0.486580
50%	-3.239116	-9.008615	-0.078214	-0.282831	0.069290	-0.074351
75%	-0.617560	-7.067923	2.580815	0.446839	0.800635	0.556044
max	10.113854	1.083788	33.589880	3.980537	4.611177	3.627894

	ax	ay	az	gx	gy	gz
count	900.000000	900.000000	900.000000	900.000000	900.000000	900.000000
mean	-3.654827	-9.961343	0.087934	-0.011950	0.219210	0.086860
std	4.559622	6.618733	6.154609	1.299093	2.271544	1.603129
min	-16.515050	-34.368008	-16.662685	-2.389984	-5.328996	-2.934509
25%	-7.165489	-13.280773	-3.640242	-0.945310	-1.437340	-1.192771
50%	-3.646460	-9.376551	-0.404673	-0.421733	-0.168704	-0.148091
75%	-0.051231	-5.318389	3.578123	0.925064	1.441585	1.008888
max	8.170855	1.344201	51.412820	3.732369	8.195647	5.386732

	ax	ay	az	gx	gy	gz
ax	1.000000	0.115509	-0.285243	-0.025989	-0.079320	-0.101888
ay	0.115509	1.000000	-0.116625	0.103438	-0.232466	-0.229022
az	-0.285243	-0.116625	1.000000	0.061195	-0.118061	-0.018629
gx	-0.025989	0.103438	0.061195	1.000000	0.029655	0.476902
gy	-0.079320	-0.232466	-0.118061	0.029655	1.000000	0.300157
gz	-0.101888	-0.229022	-0.018629	0.476902	0.300157	1.000000

	ax	ay	az	gx	gy	gz
ax	1.000000	0.139210	-0.327466	-0.107332	-0.094760	-0.156551
ay	0.139210	1.000000	-0.177498	0.126681	-0.107738	0.005062
az	-0.327466	-0.177498	1.000000	0.088376	-0.077080	-0.001062
gx	-0.107332	0.126681	0.088376	1.000000	0.067455	0.618611
gy	-0.094760	-0.107738	-0.077080	0.067455	1.000000	0.156800
gz	-0.156551	0.005062	-0.001062	0.618611	0.156800	1.000000

	ax	ay	az	gx	gy	gz
0	-4.365023	-7.358100	5.076733	0.008395	0.038031	0.006039
1	-4.212665	-7.304776	5.026167	-0.107093	-0.042987	-0.018553
2	-4.316746	-7.281084	5.050798	-0.131929	-0.060144	-0.038170
3	-4.555965	-7.415896	5.017136	-0.162665	-0.092153	-0.052569
4	-4.602786	-7.428959	5.099611	-0.220889	-0.126659	-0.059306
...	...	...	...	...	...	...
2954	-4.530768	-6.012664	6.846249	-0.305258	-0.123342	-0.004782
2955	-4.778032	-6.198387	6.809713	-0.264557	-0.106919	0.009669
2956	-5.042572	-6.267522	6.628271	-0.213454	-0.062535	0.047508
2957	-4.865793	-6.207619	5.935216	-0.225060	0.018658	0.142541
2958	-4.790702	-6.241071	5.859234	-0.237452	0.035273	0.175510

	ax	ay	az	gx	gy	gz
count	4500.000000	4500.000000	4500.000000	4500.000000	4500.000000	4500.000000
mean	-0.715596	-9.828769	2.870194	-0.007783	0.069305	-0.019031
std	5.679784	11.496961	14.182450	2.507901	3.248618	1.460084
min	-58.355330	-73.522530	-65.680550	-9.467172	-24.216618	-5.364095
25%	-3.226496	-18.099513	-3.593546	-1.587028	-1.422661	-0.747045
50%	0.061258	-6.441858	0.757861	0.103542	0.116780	0.011868
75%	2.376928	-0.340767	7.461750	1.564736	1.563222	0.737327
max	34.771310	19.047120	77.439890	15.596314	16.606546	10.029377

	ax	ay	az	gx	gy	gz
count	4500.000000	4500.000000	4500.000000	4500.000000	4500.000000	4500.000000
mean	-5.008048	-6.283606	4.555668	-0.012592	0.033249	0.036062
std	3.657334	3.528068	4.882842	2.154415	1.244822	1.387055
min	-18.992897	-26.365800	-18.561280	-3.821800	-5.858616	-4.259668
25%	-7.096464	-8.348569	1.554194	-2.084477	-0.637879	-1.048973
50%	-4.747654	-5.709247	4.018442	-0.122766	0.085556	0.080582
75%	-2.456784	-3.797111	7.929246	2.004642	0.599124	0.978384
max	5.217502	7.628606	28.570288	5.096135	9.463210	4.653065

	ax	ay	az	gx	gy	gz
count	900.000000	900.000000	900.000000	900.000000	900.000000	900.000000
mean	-0.118211	-10.076424	3.211690	-0.027394	0.227343	-0.037803
std	4.676360	12.195051	14.422964	2.664203	3.032247	1.476088
min	-22.241808	-72.455290	-54.965710	-8.880236	-20.100677	-4.693068
25%	-2.868771	-18.104933	-3.596412	-2.111949	-1.435983	-1.012736
50%	0.277612	-5.582153	1.155107	0.005236	0.118630	0.271704
75%	2.275090	-0.743720	7.574031	1.675451	1.705309	0.889394
max	17.217682	16.545313	75.672134	12.783716	10.861376	5.727054

	ax	ay	az	gx	gy	gz
count	900.000000	900.000000	900.000000	900.000000	900.000000	900.000000
mean	-4.602216	-9.198835	0.104735	-0.107253	-0.025419	0.113245
std	4.267751	3.828462	4.989880	1.278311	1.623782	0.832404
min	-21.761130	-22.758114	-13.299868	-3.335586	-5.616225	-1.212149
25%	-6.763136	-11.204503	-2.415948	-0.961659	-1.058224	-0.550900
50%	-4.378665	-8.943023	-0.729532	-0.333410	0.164401	-0.116667
75%	-2.072562	-6.191834	1.551503	0.642700	1.051609	0.576757
max	4.543543	-0.317241	35.974224	3.923971	4.877096	2.659533

A first classification¶

Correlation¶

Classification¶

Train, Validate and Test¶

`Correlation`¶

`Classification`¶

`Train, Validate and Test`¶