Laboration part 3
Preparations:
Be sure that you have done the preparations. You need to make several new recordings with the accelerometer and rategyro as active sensors. You need also to record with at least 100 Hz.
Q1: Which features seems most useful? Motivate your answer. Insert your code below, and the answer below the code.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import scipy.stats as stats
from sklearn.metrics import accuracy_score, plot_confusion_matrix, ConfusionMatrixDisplay
import warnings
warnings.filterwarnings("ignore")
Running
run_1_accelerometer = pd.read_csv("running_1/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
run_1_accelerometer_cut = run_1_accelerometer[100:]
# run_1_accelerometer.plot();
# run_1_accelerometer_cut.plot();
run_1_gyroscope = pd.read_csv("running_1/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
run_1_gyroscope_cut = run_1_gyroscope[100:1000]
# run_1_gyroscope.plot();
# run_1_gyroscope_cut.plot();
run_2_accelerometer = pd.read_csv("running_2/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
run_2_accelerometer_cut = run_2_accelerometer[100:1000]
# run_2_accelerometer.plot();
# run_2_accelerometer_cut.plot();
run_2_gyroscope = pd.read_csv("running_2/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
run_2_gyroscope_cut = run_2_gyroscope[100:1000]
# run_2_gyroscope.plot();
# run_2_gyroscope_cut.plot();
run_3_accelerometer = pd.read_csv("running_3/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
run_3_accelerometer_cut = run_3_accelerometer[100:1000]
# run_3_accelerometer.plot();
# run_3_accelerometer_cut.plot();
run_3_gyroscope = pd.read_csv("running_3/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
run_3_gyroscope_cut = run_3_gyroscope[100:1000]
# run_3_gyroscope.plot();
# run_3_gyroscope_cut.plot();
run_4_accelerometer = pd.read_csv("running_4/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
run_4_accelerometer_cut = run_4_accelerometer[100:1000]
# run_4_accelerometer.plot();
# run_4_accelerometer_cut.plot();
run_4_gyroscope = pd.read_csv("running_4/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
run_4_gyroscope_cut = run_4_gyroscope[100:1000]
# run_4_gyroscope.plot();
# run_4_gyroscope_cut.plot();
run_5_accelerometer = pd.read_csv("running_5/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
run_5_accelerometer_cut = run_5_accelerometer[100:1000]
# run_5_accelerometer.plot();
# run_5_accelerometer_cut.plot();
run_5_gyroscope = pd.read_csv("running_5/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
run_5_gyroscope_cut = run_5_gyroscope[100:1000]
# run_5_gyroscope.plot();
# run_5_gyroscope_cut.plot();
run_6_accelerometer = pd.read_csv("running_6/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
run_6_accelerometer_cut = run_6_accelerometer[100:1000]
# run_6_accelerometer.plot();
# run_6_accelerometer_cut.plot();
run_6_gyroscope = pd.read_csv("running_6/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
run_6_gyroscope_cut = run_6_gyroscope[100:1000]
# run_6_gyroscope.plot();
# run_6_gyroscope_cut.plot();
Walking
walk_1_accelerometer = pd.read_csv("walking_1/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
walk_1_accelerometer_cut = walk_1_accelerometer[100:1000]
# walk_1_accelerometer.plot();
# walk_1_accelerometer_cut.plot()
walk_1_gyroscope = pd.read_csv("walking_1/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
walk_1_gyroscope_cut = walk_1_gyroscope[100:1000]
# walk_1_gyroscope.plot();
# walk_1_gyroscope_cut.plot();
walk_2_accelerometer = pd.read_csv("walking_2/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
walk_2_accelerometer_cut = walk_2_accelerometer[100:1000]
# walk_2_accelerometer.plot();
# walk_2_accelerometer_cut.plot();
walk_2_gyroscope = pd.read_csv("walking_2/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
walk_2_gyroscope_cut = walk_2_gyroscope[100:1000]
# walk_2_gyroscope.plot();
# walk_2_gyroscope_cut.plot();
walk_3_accelerometer = pd.read_csv("walking_3/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
walk_3_accelerometer_cut = walk_3_accelerometer[100:1000]
# walk_3_accelerometer.plot();
# walk_3_accelerometer_cut.plot()
walk_3_gyroscope = pd.read_csv("walking_3/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
walk_3_gyroscope_cut = walk_3_gyroscope[100:1000]
# walk_3_gyroscope.plot();
# walk_3_gyroscope_cut.plot();
walk_4_accelerometer = pd.read_csv("walking_4/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
walk_4_accelerometer_cut = walk_4_accelerometer[100:1000]
# walk_4_accelerometer.plot();
# walk_4_accelerometer_cut.plot();
walk_4_gyroscope = pd.read_csv("walking_4/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
walk_4_gyroscope_cut = walk_4_gyroscope[100:1000]
# walk_4_gyroscope.plot();
# walk_4_gyroscope_cut.plot();
walk_5_accelerometer = pd.read_csv("walking_5/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
walk_5_accelerometer_cut = walk_5_accelerometer[100:1000]
# walk_5_accelerometer.plot();
# walk_5_accelerometer_cut.plot();
walk_5_gyroscope = pd.read_csv("walking_5/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
walk_5_gyroscope_cut = walk_5_gyroscope[100:1000]
# walk_5_gyroscope.plot();
# walk_5_gyroscope_cut.plot();
walk_6_accelerometer = pd.read_csv("walking_6/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
walk_6_accelerometer_cut = walk_6_accelerometer[100:1000]
# walk_6_accelerometer.plot();
# walk_6_accelerometer_cut.plot();
walk_6_gyroscope = pd.read_csv("walking_6/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
walk_6_gyroscope_cut = walk_6_gyroscope[100:1000]
# walk_6_gyroscope.plot();
# walk_6_gyroscope_cut.plot();
Jumping
jump_1_accelerometer = pd.read_csv("jumping_1/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
jump_1_accelerometer_cut = jump_1_accelerometer[100:1000]
# jump_1_accelerometer.plot();
# jump_1_accelerometer_cut.plot();
jump_1_gyroscope = pd.read_csv("jumping_1/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
jump_1_gyroscope_cut = jump_1_gyroscope[100:1000]
# jump_1_gyroscope.plot();
# jump_1_gyroscope_cut.plot();
jump_2_accelerometer = pd.read_csv("jumping_2/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
jump_2_accelerometer_cut = jump_2_accelerometer[100:1000]
# jump_2_accelerometer.plot();
# jump_2_accelerometer_cut.plot();
jump_2_gyroscope = pd.read_csv("jumping_2/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
jump_2_gyroscope_cut = jump_2_gyroscope[100:1000]
# jump_2_gyroscope.plot();
# jump_2_gyroscope_cut.plot();
jump_3_accelerometer = pd.read_csv("jumping_3/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
jump_3_accelerometer_cut = jump_3_accelerometer[100:1000]
# jump_3_accelerometer.plot();
# jump_3_accelerometer_cut.plot();
jump_3_gyroscope = pd.read_csv("jumping_3/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
jump_3_gyroscope_cut = jump_3_gyroscope[100:1000]
# jump_3_gyroscope.plot();
# jump_3_gyroscope_cut.plot();
jump_4_accelerometer = pd.read_csv("jumping_4/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
jump_4_accelerometer_cut = jump_4_accelerometer[100:1000]
# jump_4_accelerometer.plot();
# jump_4_accelerometer_cut.plot();
jump_4_gyroscope = pd.read_csv("jumping_4/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
jump_4_gyroscope_cut = jump_4_gyroscope[100:1000]
# jump_4_gyroscope.plot();
# jump_4_gyroscope_cut.plot();
jump_5_accelerometer = pd.read_csv("jumping_5/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
jump_5_accelerometer_cut = jump_5_accelerometer[100:1000]
# jump_5_accelerometer.plot();
# jump_5_accelerometer_cut.plot();
jump_5_gyroscope = pd.read_csv("jumping_5/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
jump_5_gyroscope_cut = jump_5_gyroscope[100:1000]
# jump_5_gyroscope.plot();
# jump_5_gyroscope_cut.plot();
jump_6_accelerometer = pd.read_csv("jumping_6/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
jump_6_accelerometer_cut = jump_6_accelerometer[100:1000]
# jump_6_accelerometer.plot();
# jump_6_accelerometer_cut.plot();
jump_6_gyroscope = pd.read_csv("jumping_6/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
jump_6_gyroscope_cut = jump_6_gyroscope[100:1000]
# jump_6_gyroscope.plot();
# jump_6_gyroscope_cut.plot();
bend_knee
bend_knee_1_accelerometer = pd.read_csv("bend_knee_1/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
bend_knee_1_accelerometer_cut = bend_knee_1_accelerometer[100:1000]
# bend_knee_1_accelerometer.plot();
# bend_knee_1_accelerometer_cut.plot();
bend_knee_1_gyroscope = pd.read_csv("bend_knee_1/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
bend_knee_1_gyroscope_cut = bend_knee_1_gyroscope[100:1000]
# bend_knee_1_gyroscope.plot();
# bend_knee_1_gyroscope_cut.plot();
bend_knee_2_accelerometer = pd.read_csv("bend_knee_2/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
bend_knee_2_accelerometer_cut = bend_knee_2_accelerometer[100:1000]
# bend_knee_2_accelerometer.plot();
# bend_knee_2_accelerometer_cut.plot();
bend_knee_2_gyroscope = pd.read_csv("bend_knee_2/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
bend_knee_2_gyroscope_cut = bend_knee_2_gyroscope[100:1000]
# bend_knee_2_gyroscope.plot();
# bend_knee_2_gyroscope_cut.plot();
bend_knee_3_accelerometer = pd.read_csv("bend_knee_3/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
bend_knee_3_accelerometer_cut = bend_knee_3_accelerometer[100:1000]
# bend_knee_3_accelerometer.plot();
# bend_knee_3_accelerometer_cut.plot();
bend_knee_3_gyroscope = pd.read_csv("bend_knee_3/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
bend_knee_3_gyroscope_cut = bend_knee_3_gyroscope[100:1000]
# bend_knee_3_gyroscope.plot();
# bend_knee_3_gyroscope_cut.plot();
bend_knee_4_accelerometer = pd.read_csv("bend_knee_4/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
bend_knee_4_accelerometer_cut = bend_knee_4_accelerometer[100:1000]
# bend_knee_4_accelerometer.plot();
# bend_knee_4_accelerometer_cut.plot();
bend_knee_4_gyroscope = pd.read_csv("bend_knee_4/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
bend_knee_4_gyroscope_cut = bend_knee_4_gyroscope[100:1000]
# bend_knee_4_gyroscope.plot();
# bend_knee_4_gyroscope_cut.plot();
bend_knee_5_accelerometer = pd.read_csv("bend_knee_5/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
bend_knee_5_accelerometer_cut = bend_knee_5_accelerometer[100:1000]
# bend_knee_5_accelerometer.plot();
# bend_knee_5_accelerometer_cut.plot();
bend_knee_5_gyroscope = pd.read_csv("bend_knee_5/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
bend_knee_5_gyroscope_cut = bend_knee_5_gyroscope[100:1000]
# bend_knee_5_gyroscope.plot();
# bend_knee_5_gyroscope_cut.plot();
bend_knee_6_accelerometer = pd.read_csv("bend_knee_6/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
bend_knee_6_accelerometer_cut = bend_knee_6_accelerometer[100:1000]
# bend_knee_6_accelerometer.plot();
# bend_knee_6_accelerometer_cut.plot();
bend_knee_6_gyroscope = pd.read_csv("bend_knee_6/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
bend_knee_6_gyroscope_cut = bend_knee_6_gyroscope[100:1000]
# bend_knee_6_gyroscope.plot();
# bend_knee_6_gyroscope_cut.plot();
All movements together
all_in_one_1_accelerometer = pd.read_csv("all_in_one_1/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
all_in_one_1_accelerometer_cut = all_in_one_1_accelerometer[100:1000]
# all_in_one_1_accelerometer.plot();
# all_in_one_1_accelerometer_cut.plot();
all_in_one_1_gyroscope = pd.read_csv("all_in_one_1/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
all_in_one_1_gyroscope_cut = all_in_one_1_gyroscope[100:1000]
# all_in_one_1_gyroscope.plot();
# all_in_one_1_gyroscope_cut.plot();
all_in_one_2_accelerometer = pd.read_csv("all_in_one_2/Accelerometer.csv", usecols=[2,3,4],
names=['ax', 'ay', 'az'], header=0)
all_in_one_2_accelerometer_cut = all_in_one_2_accelerometer[100:1000]
# all_in_one_2_accelerometer.plot();
# all_in_one_2_accelerometer_cut.plot();
all_in_one_2_gyroscope = pd.read_csv("all_in_one_2/Gyroscope.csv", usecols=[2,3,4],
names=['gx', 'gy', 'gz'], header=0)
all_in_one_2_gyroscope_cut = all_in_one_2_gyroscope[100:1000]
# all_in_one_2_gyroscope.plot();
# all_in_one_2_gyroscope_cut.plot();
def read_all_files(accelerometer,gyroscope, clas):
accelerometer_set = accelerometer
gyroscope_set = gyroscope
return pd.concat([accelerometer_set, gyroscope_set], axis=1).dropna().assign(class_=clas)
def training_data():
run_train_1 = read_all_files(run_1_accelerometer_cut, run_1_gyroscope_cut, "running")
run_train_2 = read_all_files(run_2_accelerometer_cut, run_2_gyroscope_cut, "running")
run_train_3 = read_all_files(run_3_accelerometer_cut, run_3_gyroscope_cut, "running")
run_train_4 = read_all_files(run_4_accelerometer_cut, run_4_gyroscope_cut, "running")
run_train_5 = read_all_files(run_5_accelerometer_cut, run_5_gyroscope_cut, "running")
walk_train_1 = read_all_files(walk_1_accelerometer_cut, walk_1_gyroscope_cut, "walking")
walk_train_2 = read_all_files(walk_2_accelerometer_cut, walk_2_gyroscope_cut, "walking")
walk_train_3 = read_all_files(walk_3_accelerometer_cut, walk_3_gyroscope_cut, "walking")
walk_train_4 = read_all_files(walk_4_accelerometer_cut, walk_4_gyroscope_cut, "walking")
walk_train_5 = read_all_files(walk_5_accelerometer_cut, walk_5_gyroscope_cut, "walking")
jump_train_1 = read_all_files(jump_1_accelerometer_cut, jump_1_gyroscope_cut, "jumping")
jump_train_2 = read_all_files(jump_2_accelerometer_cut, jump_2_gyroscope_cut, "jumping")
jump_train_3 = read_all_files(jump_3_accelerometer_cut, jump_3_gyroscope_cut, "jumping")
jump_train_4 = read_all_files(jump_4_accelerometer_cut, jump_4_gyroscope_cut, "jumping")
jump_train_5 = read_all_files(jump_5_accelerometer_cut, jump_5_gyroscope_cut, "jumping")
bend_knee_train_1 = read_all_files(bend_knee_1_accelerometer_cut, bend_knee_1_gyroscope_cut, "bend_knee")
bend_knee_train_2 = read_all_files(bend_knee_2_accelerometer_cut, bend_knee_2_gyroscope_cut, "bend_knee")
bend_knee_train_3 = read_all_files(bend_knee_3_accelerometer_cut, bend_knee_3_gyroscope_cut, "bend_knee")
bend_knee_train_4 = read_all_files(bend_knee_4_accelerometer_cut, bend_knee_4_gyroscope_cut, "bend_knee")
bend_knee_train_5 = read_all_files(bend_knee_5_accelerometer_cut, bend_knee_5_gyroscope_cut, "bend_knee")
return [run_train_1, run_train_2, run_train_3, run_train_4, run_train_5,
walk_train_1, walk_train_2, walk_train_3, walk_train_4, walk_train_5,
jump_train_1, jump_train_2, jump_train_3, jump_train_4, jump_train_5,
bend_knee_train_1, bend_knee_train_2, bend_knee_train_3, bend_knee_train_4, bend_knee_train_5]
def testing_data():
run_test = read_all_files(run_6_accelerometer_cut, run_6_gyroscope_cut, "running")
walk_test = read_all_files(walk_6_accelerometer_cut, walk_6_gyroscope_cut, "walking")
jump_test = read_all_files(jump_6_accelerometer_cut, jump_6_gyroscope_cut, "jumping")
bend_knee_test = read_all_files(bend_knee_6_accelerometer_cut, bend_knee_6_gyroscope_cut, "bend_knee")
return [run_test, walk_test, jump_test, bend_knee_test]
plotted_graphs
# training_data() plot
pd.concat(training_data(), ignore_index=True).plot();
# testing_data() plot
pd.concat(testing_data(), ignore_index=True).plot();
sns.pairplot(pd.concat(training_data(), ignore_index=True), hue="class_", height =2.5);
sns.pairplot(pd.concat(testing_data(), ignore_index=True), hue="class_", height =2.5);
store_data_as_csv
training_data_store = pd.concat(training_data(), ignore_index=True)
training_data_store.to_csv('cut_training_data_file/cut_training_data.csv', index=False)
testing_data_store = pd.concat(testing_data(), ignore_index=True)
testing_data_store.to_csv('cut_testing_data_file/cut_testing_data.csv', index=False)
training_data_store
ax | ay | az | gx | gy | gz | class_ | |
---|---|---|---|---|---|---|---|
0 | 3.536667 | -8.764157 | 8.219295 | -0.426663 | 0.376293 | 0.468237 | running |
1 | 3.972804 | -12.131095 | 11.188315 | -0.133465 | -0.126467 | 0.777247 | running |
2 | 3.418738 | -14.802985 | 12.352308 | 0.195966 | -0.441429 | 0.986390 | running |
3 | 1.820551 | -16.844654 | 8.020269 | 0.370743 | -0.536392 | 1.180035 | running |
4 | 0.771038 | -16.254522 | 3.320959 | -0.066549 | -0.279340 | 1.371567 | running |
... | ... | ... | ... | ... | ... | ... | ... |
17820 | -4.225421 | -7.347326 | 2.834419 | 1.315053 | 0.989951 | 0.788295 | bend_knee |
17821 | -3.888998 | -6.893213 | 2.058209 | 1.380154 | 1.288926 | 0.862489 | bend_knee |
17822 | -4.683547 | -6.589226 | 1.510789 | 1.444906 | 1.497039 | 0.967244 | bend_knee |
17823 | -5.238657 | -6.579218 | 0.651558 | 1.428796 | 1.569016 | 1.079469 | bend_knee |
17824 | -5.052723 | -6.786039 | -0.916491 | 1.325403 | 1.399684 | 1.295959 | bend_knee |
17825 rows × 7 columns
testing_data_store
ax | ay | az | gx | gy | gz | class_ | |
---|---|---|---|---|---|---|---|
0 | -1.303614 | -6.411490 | 4.077556 | -0.421829 | -0.733195 | -0.274802 | running |
1 | -0.998066 | -6.549138 | 4.557804 | -0.496127 | -0.755675 | -0.248011 | running |
2 | -0.687327 | -6.553696 | 4.994870 | -0.511329 | -0.774472 | -0.243334 | running |
3 | -0.329969 | -6.566999 | 6.440067 | -0.487907 | -0.773408 | -0.255150 | running |
4 | -0.195836 | -6.753056 | 6.938913 | -0.338978 | -0.745797 | -0.273982 | running |
... | ... | ... | ... | ... | ... | ... | ... |
3595 | -9.171450 | -3.374264 | 8.320090 | 0.772116 | 0.337809 | 0.808978 | bend_knee |
3596 | -10.878668 | -3.192045 | 8.108212 | 0.496721 | 0.239599 | 0.469790 | bend_knee |
3597 | -10.963327 | -3.435191 | 8.807771 | 0.264854 | -0.078453 | -0.017017 | bend_knee |
3598 | -9.955962 | -3.697164 | 9.846040 | 0.071157 | -0.217381 | -0.255795 | bend_knee |
3599 | -9.054172 | -3.656607 | 12.560192 | -0.259478 | -0.084963 | -0.418268 | bend_knee |
3600 rows × 7 columns
Answer 1: ax, ay, gx, gy are useful. These features are used to measure the property of acceleration and gyroscope in x, y directions because I only moved with forward direction in running, walking and jumping.
Q2: Explain the results in your confusion matrix. Use one classifier of your own choice.
testing_data_store.isnull().values.any()
False
training_data_store.isnull().values.any()
False
from sklearn.model_selection import train_test_split
X = training_data_store.drop(['class_'], axis='columns')
y = training_data_store['class_']
print(X.shape)
print(y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
print(len(X_train))
print(len(X_test))
print(X_train.shape)
print(y_train.shape)
(17825, 6) (17825,) 14260 3565 (14260, 6) (14260,)
Model fitting with K-cross Validation and GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
for weights in ["uniform", "distance"]:
knn = KNeighborsClassifier(weights=weights)
k_range = list(range(1, 100))
param_grid = dict(n_neighbors=k_range)
# defining parameter range
grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)
# fitting the model for grid search
grid_search=grid.fit(X_train, y_train)
print(f"{grid_search.best_params_}")
accuracy = grid_search.best_score_ *100
print(f"Accuracy [{weights}] for our training dataset with tuning is {accuracy}\n\n" )
Fitting 10 folds for each of 99 candidates, totalling 990 fits {'n_neighbors': 1} Accuracy [uniform] for our training dataset with tuning is 87.0687237026648 Fitting 10 folds for each of 99 candidates, totalling 990 fits {'n_neighbors': 4} Accuracy [distance] for our training dataset with tuning is 87.19495091164096
#grid['mean_score_test']
#means = grid.cv_results_['mean_test_score']
#plt.plot(k_range, means)
grid.best_score_
0.8719495091164096
Create KNN (K Neighrest Neighbour Classifier)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=4, weights='distance')
knn.fit(X_train, y_train)
KNeighborsClassifier(n_neighbors=4, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier(n_neighbors=4, weights='distance')
knn.score(X_test, y_test)
0.8788218793828892
# -9.955962 -3.697164 9.846040 0.071157 -0.217381 -0.255795 bend_knee
knn.predict([[-9.955962, -3.697164, 9.846040, 0.071157, -0.217381, -0.255795]])
array(['bend_knee'], dtype=object)
from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
ConfusionMatrixDisplay.from_estimator(grid, X_test, y_test);
[[850 21 17 52] [ 46 704 74 55] [ 20 33 779 59] [ 21 14 20 800]]
X_train
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
1966 | 1.570903 | 1.767323 | 5.805029 | -0.934955 | -2.183075 | -1.920682 |
6584 | 3.373976 | -9.811429 | -0.365259 | -0.370149 | 0.377253 | -0.718849 |
11434 | -0.410357 | 2.805525 | -1.407302 | -1.180279 | 0.371930 | 0.524698 |
13665 | -4.804913 | -5.455974 | 1.120869 | -2.360837 | 0.023318 | -0.450382 |
801 | -6.805039 | -3.854358 | -3.486446 | -0.235427 | 0.857009 | 0.504941 |
... | ... | ... | ... | ... | ... | ... |
10955 | 0.445532 | -31.626226 | 17.553684 | 1.036185 | 2.632986 | 0.304403 |
17289 | -11.245843 | -3.027372 | 9.695894 | -1.803309 | 0.618824 | -0.661218 |
5192 | -7.766551 | -14.789760 | -1.387919 | 3.597769 | -0.839905 | 2.393161 |
12172 | 8.263492 | -11.748184 | -20.384186 | 1.199669 | 10.835231 | -0.605874 |
235 | -2.914155 | -1.697288 | 5.323143 | -0.783164 | 0.406871 | -0.246528 |
14260 rows × 6 columns
from sklearn.preprocessing import StandardScaler
# Creating StandardScaler Object
scaler = StandardScaler()
# print(scaler.fit(data))
# X_train = X_train.drop('class_', axis=1)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
print(len(X_train))
print(len(X_test))
# Creating Classifier Model
model=KNeighborsClassifier(n_neighbors=4, weights='distance')
model.fit(X_train,y_train)
# Accuracy on Training Data
score=model.score(X_test,y_test)
print("Accuracy for our training dataset using Standard Scaler is : {:.3f}%".format(score*100) )
14260 3565 Accuracy for our training dataset using Standard Scaler is : 92.482%
StandardScaler above refer to training datasets for Q4
testing_data
from sklearn.model_selection import train_test_split
X = testing_data_store.drop(['class_'], axis='columns')
y = testing_data_store['class_']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
print(len(X_train))
print(len(X_test))
2880 720
Model fitting with K-cross Validation and GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
for weights in ["uniform", "distance"]:
knn = KNeighborsClassifier(weights=weights)
k_range = list(range(1, 100))
param_grid = dict(n_neighbors=k_range)
# defining parameter range
grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy', return_train_score=False,verbose=1)
# fitting the model for grid search
grid_search=grid.fit(X_train, y_train)
print(f"{grid_search.best_params_}")
accuracy = grid_search.best_score_ *100
print(f"Accuracy [{weights}] for our testing dataset with tuning is {accuracy}\n\n" )
Fitting 10 folds for each of 99 candidates, totalling 990 fits {'n_neighbors': 1} Accuracy [uniform] for our testing dataset with tuning is 87.60416666666669 Fitting 10 folds for each of 99 candidates, totalling 990 fits {'n_neighbors': 1} Accuracy [distance] for our testing dataset with tuning is 87.60416666666669
Create KNN (K Neighrest Neighbour Classifier)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1, weights='distance')
knn.fit(X_train, y_train)
KNeighborsClassifier(n_neighbors=1, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier(n_neighbors=1, weights='distance')
knn.score(X_test, y_test)
0.8958333333333334
# -0.329969 -6.566999 6.440067 -0.487907 -0.773408 -0.255150 running
knn.predict([[-0.329969, -6.566999, 6.440067, -0.487907, -0.773408, -0.255150]])
array(['running'], dtype=object)
from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
ConfusionMatrixDisplay.from_estimator(grid, X_test, y_test);
[[166 4 2 2] [ 12 154 9 4] [ 5 4 163 14] [ 5 2 12 162]]
from sklearn.preprocessing import StandardScaler
# Creating StandardScaler Object
scaler = StandardScaler()
# print(scaler.fit(data))
# X_train = X_train.drop('class_', axis=1)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
print(len(X_train))
print(len(X_test))
# Creating Classifier Model
model=KNeighborsClassifier(n_neighbors=1, weights='distance')
model.fit(X_train,y_train)
# Accuracy on Training Data
score=model.score(X_test,y_test)
print("Accuracy for our testing dataset using Standard Scaler is : {:.3f}%".format(score*100) )
2880 720 Accuracy for our testing dataset using Standard Scaler is : 93.889%
StandardScaler above refer to testing datasets for Q4
Answer 2:
I used KNN classifier as my choice. My confusion matrix on training data sets and testing data sets are not perfect. So I think I might need to used StandardScaler to achieve higher accuracy. Thus, this answer will need to be continue to answer 4.
Preprocess_the_data
Q3: Calculate the standard statistical values for each recording and each feature. Use (.describe()) for example. Explain the results.
Total_data_used_statistical_values_training_data_store_describe = training_data_store.describe()
Total_data_used_statistical_values_training_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 17825.000000 | 17825.000000 | 17825.000000 | 17825.000000 | 17825.000000 | 17825.000000 |
mean | -2.986353 | -8.950275 | 2.074440 | -0.031268 | 0.095956 | 0.043261 |
std | 4.853279 | 7.784934 | 8.915541 | 1.941447 | 2.518209 | 1.561716 |
min | -58.355330 | -73.522530 | -65.680550 | -9.467172 | -24.216618 | -5.364095 |
25% | -6.024376 | -12.290586 | -1.960957 | -1.291544 | -1.093938 | -0.876417 |
50% | -3.116935 | -7.741853 | 1.317041 | -0.201044 | 0.033458 | -0.076044 |
75% | 0.355663 | -3.917728 | 5.650498 | 1.384884 | 1.021890 | 0.817460 |
max | 34.771310 | 19.047120 | 77.439890 | 15.596314 | 16.606546 | 10.029377 |
sns.pairplot(pd.concat(training_data(), ignore_index=True), hue="class_", height =2.5);
Total_data_used_statistical_values_testing_data_store_describe = testing_data_store.describe()
Total_data_used_statistical_values_testing_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 3600.000000 | 3600.000000 | 3600.000000 | 3600.000000 | 3600.000000 | 3600.000000 |
mean | -3.146436 | -8.909527 | 2.227296 | -0.012130 | 0.109259 | 0.060217 |
std | 4.588190 | 7.544367 | 8.876328 | 1.966366 | 2.119607 | 1.326979 |
min | -22.241808 | -72.455290 | -54.965710 | -8.880236 | -20.100677 | -4.693068 |
25% | -5.784240 | -11.836035 | -2.239646 | -1.277027 | -1.039195 | -0.803118 |
50% | -3.168965 | -7.640913 | 1.244525 | -0.272053 | 0.058032 | 0.021660 |
75% | -0.084896 | -4.115906 | 5.800447 | 1.431523 | 1.058581 | 0.862476 |
max | 17.217682 | 16.545313 | 75.672134 | 12.783716 | 10.861376 | 5.727054 |
sns.pairplot(pd.concat(testing_data(), ignore_index=True), hue="class_", height =2.5);
Answer 3:
In my training_data(), my total number of count for ax, ay, az, ga, gy, gz are 17825. In my testing_data(), my total number of count for ax, ay, az, ga, gy, gz are 3600. For both of the data sets, mean value of az has the most positive value. Because most of the time I only move in x and y direction. Std of az has the highest varies. Because I did not use the direction much, so the data will not vary much. ay has the highest min negative value; az has the highest max positive value.
Q4: Decide if you need to scale the data or not. Which scaler do you choose? What do you expect from the choice? Better or worse result? Motivate your answer.
Answer 4: Answers are shown in the line "StandardScaler above refer to training datasets for Q4" and "StandardScaler above refer to testing datasets for Q4". I think scaling the data is needed. As my knn scores increased.
Q5: Transform the acceleration to magnitude and angle. Keep this as a separate dataframe, but include the rategyro sensorvalues in this one as well, for future evaluation, i.e. classification.
# acceleration
d = pd.concat(training_data(), ignore_index=True)
a = np.sqrt((d['ax'] **2) + (d['ay'] **2) + (d['az'] **2))
a
norm_acceleration= d.assign(m_norm_acceleration=a)
theta_acceleration = d.assign(my_theta_acceleration=np.arccos(d['az'] / norm_acceleration['m_norm_acceleration']))
class_ = d['class_']
# rategyro
a = np.sqrt((d['gx'] **2) + (d['gy'] **2) + (d['gz'] **2))
a
norm_rategyro = d.assign(m_norm_rategyro=a)
theta_rategyro = d.assign(my_theta_rategyro=np.arccos(d['gy'] /norm_rategyro['m_norm_rategyro']))
df = pd.DataFrame({'class_' : class_,
'norm_acceleration': norm_acceleration['m_norm_acceleration'],
'theta_acceleration': theta_acceleration['my_theta_acceleration'],
'norm_rategyro': norm_rategyro['m_norm_rategyro'],
'theta_rategyro': theta_rategyro['my_theta_rategyro']
})
df
# df = pd.concat([df], axis=1).dropna().assign(class_=class_)
class_ | norm_acceleration | theta_acceleration | norm_rategyro | theta_rategyro | |
---|---|---|---|---|---|
0 | running | 12.524986 | 0.854982 | 0.736806 | 1.034788 |
1 | running | 16.974246 | 0.851129 | 0.798699 | 1.729806 |
2 | running | 19.580492 | 0.888151 | 1.098284 | 1.984415 |
3 | running | 18.745172 | 1.128675 | 1.348202 | 1.979977 |
4 | running | 16.608213 | 1.369480 | 1.401305 | 1.771483 |
... | ... | ... | ... | ... | ... |
17820 | bend_knee | 8.937076 | 1.248071 | 1.825042 | 0.997474 |
17821 | bend_knee | 8.177831 | 1.316379 | 2.076064 | 0.900969 |
17822 | bend_knee | 8.224111 | 1.386045 | 2.294436 | 0.859964 |
17823 | bend_knee | 8.435293 | 1.493477 | 2.380866 | 0.851294 |
17824 | bend_knee | 8.510011 | 1.678701 | 2.322783 | 0.924055 |
17825 rows × 5 columns
Answer 5: The answer is shown above.
Q6: Investigate if you can use some statistical measure for each class for classification purposes.
Q7:Examine the frequency the pattern of your movements repeats itself for the different movements. Could this be helpful for the classification?
traning_data
training_data_store
ax | ay | az | gx | gy | gz | class_ | |
---|---|---|---|---|---|---|---|
0 | 3.536667 | -8.764157 | 8.219295 | -0.426663 | 0.376293 | 0.468237 | running |
1 | 3.972804 | -12.131095 | 11.188315 | -0.133465 | -0.126467 | 0.777247 | running |
2 | 3.418738 | -14.802985 | 12.352308 | 0.195966 | -0.441429 | 0.986390 | running |
3 | 1.820551 | -16.844654 | 8.020269 | 0.370743 | -0.536392 | 1.180035 | running |
4 | 0.771038 | -16.254522 | 3.320959 | -0.066549 | -0.279340 | 1.371567 | running |
... | ... | ... | ... | ... | ... | ... | ... |
17820 | -4.225421 | -7.347326 | 2.834419 | 1.315053 | 0.989951 | 0.788295 | bend_knee |
17821 | -3.888998 | -6.893213 | 2.058209 | 1.380154 | 1.288926 | 0.862489 | bend_knee |
17822 | -4.683547 | -6.589226 | 1.510789 | 1.444906 | 1.497039 | 0.967244 | bend_knee |
17823 | -5.238657 | -6.579218 | 0.651558 | 1.428796 | 1.569016 | 1.079469 | bend_knee |
17824 | -5.052723 | -6.786039 | -0.916491 | 1.325403 | 1.399684 | 1.295959 | bend_knee |
17825 rows × 7 columns
run = training_data_store['class_'].str.contains('running')
if (run.any()):
print(training_data_store[run])
ax ay az gx gy gz class_ 0 3.536667 -8.764157 8.219295 -0.426663 0.376293 0.468237 running 1 3.972804 -12.131095 11.188315 -0.133465 -0.126467 0.777247 running 2 3.418738 -14.802985 12.352308 0.195966 -0.441429 0.986390 running 3 1.820551 -16.844654 8.020269 0.370743 -0.536392 1.180035 running 4 0.771038 -16.254522 3.320959 -0.066549 -0.279340 1.371567 running ... ... ... ... ... ... ... ... 4495 -6.606886 -4.424983 -4.107464 -0.418879 1.336381 0.219632 running 4496 -7.275557 -6.001708 -5.381811 -0.432911 1.277232 0.625369 running 4497 -7.173545 -6.372599 -6.186866 -0.282813 0.786166 1.166578 running 4498 -7.410753 -8.059927 -7.771310 -0.056985 -0.008063 1.433666 running 4499 -7.690779 -12.510910 -1.063169 1.048035 0.516007 2.052018 running [4500 rows x 7 columns]
run_statistical_values_training_data_store_describe = training_data_store[run].describe()
run_statistical_values_training_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | -3.155088 | -10.370162 | 0.210956 | -0.033843 | 0.277159 | 0.078657 |
std | 4.865273 | 8.646869 | 7.915198 | 1.673507 | 3.341670 | 2.241965 |
min | -19.415750 | -37.804590 | -25.676895 | -4.784052 | -8.252998 | -4.036597 |
25% | -6.932648 | -15.377623 | -3.924741 | -1.100696 | -1.797942 | -1.491318 |
50% | -3.904732 | -9.199481 | -0.329322 | -0.296401 | -0.293163 | -0.582800 |
75% | 0.901872 | -3.546445 | 4.436671 | 1.062046 | 1.967501 | 1.272764 |
max | 16.223417 | 4.778022 | 69.153336 | 5.208708 | 12.455402 | 9.648895 |
run_list = [training_data_store[run]]
sns.pairplot(pd.concat(run_list, ignore_index=True), height =2.5);
jump = training_data_store['class_'].str.contains('jumping')
if (jump.any()):
print(training_data_store[jump])
ax ay az gx gy gz class_ 8825 3.241912 -13.474079 7.663859 0.448358 -0.223612 -0.345523 jumping 8826 3.226168 -11.907581 6.437711 0.616293 -0.367828 -0.525466 jumping 8827 2.664996 -10.304280 4.496675 0.660625 -0.486336 -0.569012 jumping 8828 2.882513 -8.580618 2.771164 0.488605 -0.823586 -0.571246 jumping 8829 3.687789 -8.448391 2.221647 -0.277926 -0.764507 -0.695077 jumping ... ... ... ... ... ... ... ... 13320 -4.133810 -14.072485 -12.496738 2.834519 2.962801 -1.359437 jumping 13321 -7.947897 -6.630090 -11.768182 2.187142 1.589594 -0.944904 jumping 13322 -9.582275 -2.473383 -2.501960 1.497248 0.307335 -0.489233 jumping 13323 -10.397558 -3.243473 5.110979 0.465863 -0.070232 -0.020804 jumping 13324 -9.043609 -4.898709 5.657096 -1.078474 0.561385 0.207205 jumping [4500 rows x 7 columns]
jump_statistical_values_training_data_store_describe = training_data_store[jump].describe()
jump_statistical_values_training_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | -0.715596 | -9.828769 | 2.870194 | -0.007783 | 0.069305 | -0.019031 |
std | 5.679784 | 11.496961 | 14.182450 | 2.507901 | 3.248618 | 1.460084 |
min | -58.355330 | -73.522530 | -65.680550 | -9.467172 | -24.216618 | -5.364095 |
25% | -3.226496 | -18.099513 | -3.593546 | -1.587028 | -1.422661 | -0.747045 |
50% | 0.061258 | -6.441858 | 0.757861 | 0.103542 | 0.116780 | 0.011868 |
75% | 2.376928 | -0.340767 | 7.461750 | 1.564736 | 1.563222 | 0.737327 |
max | 34.771310 | 19.047120 | 77.439890 | 15.596314 | 16.606546 | 10.029377 |
jump_list = [training_data_store[jump]]
sns.pairplot(pd.concat(jump_list, ignore_index=True), height =2.5);
walk = training_data_store['class_'].str.contains('walking')
if (walk.any()):
print(training_data_store[walk])
ax ay az gx gy gz class_ 4500 -2.006896 -7.116554 4.699732 0.041452 -0.587792 0.198252 walking 4501 -1.830023 -6.753287 4.192621 -0.004119 -0.716754 0.176837 walking 4502 -1.560215 -6.247737 3.953517 -0.093619 -0.899281 0.125611 walking 4503 -1.409974 -6.082172 4.246691 -0.150919 -0.966371 0.082659 walking 4504 -1.212864 -6.145609 5.386715 -0.207450 -0.927834 0.021939 walking ... ... ... ... ... ... ... ... 8820 2.219875 -2.280391 -5.598362 -0.786829 1.433788 0.173172 walking 8821 2.369082 -1.991162 -5.703477 -1.031978 0.561385 -0.354948 walking 8822 2.184450 -2.232325 -5.271151 -1.168830 0.010245 -0.711536 walking 8823 0.314665 -5.304507 -4.289749 -1.261542 -0.609382 -1.066256 walking 8824 -1.617743 -8.312736 -3.499221 -1.268436 -0.888058 -1.139299 walking [4325 rows x 7 columns]
walk_statistical_values_training_data_store_describe = training_data_store[walk].describe()
walk_statistical_values_training_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 4325.000000 | 4325.000000 | 4325.000000 | 4325.000000 | 4325.000000 | 4325.000000 |
mean | -3.069930 | -9.333466 | 0.603748 | -0.072456 | 0.000396 | 0.078736 |
std | 3.914733 | 3.331278 | 3.941401 | 1.116711 | 1.371160 | 0.772815 |
min | -18.453377 | -24.694840 | -13.449025 | -2.630403 | -5.044455 | -2.700984 |
25% | -5.089412 | -11.313857 | -1.673116 | -0.836676 | -0.907466 | -0.486580 |
50% | -3.239116 | -9.008615 | -0.078214 | -0.282831 | 0.069290 | -0.074351 |
75% | -0.617560 | -7.067923 | 2.580815 | 0.446839 | 0.800635 | 0.556044 |
max | 10.113854 | 1.083788 | 33.589880 | 3.980537 | 4.611177 | 3.627894 |
walk_list = [training_data_store[walk]]
sns.pairplot(pd.concat(walk_list, ignore_index=True), height =2.5);
bend_knee = training_data_store['class_'].str.contains('bend_knee')
if (bend_knee.any()):
print(training_data_store[bend_knee])
ax ay az gx gy gz class_ 13325 -4.387365 -6.716186 -0.015658 -1.090517 0.808576 -2.368394 bend_knee 13326 -2.745575 -5.280220 0.215947 -1.099680 0.861111 -2.340731 bend_knee 13327 -1.124988 -4.220719 -0.793802 -1.132387 0.795015 -2.374311 bend_knee 13328 2.272768 -3.300302 -0.082188 -1.190419 0.673819 -2.649776 bend_knee 13329 3.303864 -4.243952 -0.700687 -1.022431 0.564439 -3.205908 bend_knee ... ... ... ... ... ... ... ... 17820 -4.225421 -7.347326 2.834419 1.315053 0.989951 0.788295 bend_knee 17821 -3.888998 -6.893213 2.058209 1.380154 1.288926 0.862489 bend_knee 17822 -4.683547 -6.589226 1.510789 1.444906 1.497039 0.967244 bend_knee 17823 -5.238657 -6.579218 0.651558 1.428796 1.569016 1.079469 bend_knee 17824 -5.052723 -6.786039 -0.916491 1.325403 1.399684 1.295959 bend_knee [4500 rows x 7 columns]
bend_knee_statistical_values_training_data_store_describe = training_data_store[bend_knee].describe()
bend_knee_statistical_values_training_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | -5.008048 | -6.283606 | 4.555668 | -0.012592 | 0.033249 | 0.036062 |
std | 3.657334 | 3.528068 | 4.882842 | 2.154415 | 1.244822 | 1.387055 |
min | -18.992897 | -26.365800 | -18.561280 | -3.821800 | -5.858616 | -4.259668 |
25% | -7.096464 | -8.348569 | 1.554194 | -2.084477 | -0.637879 | -1.048973 |
50% | -4.747654 | -5.709247 | 4.018442 | -0.122766 | 0.085556 | 0.080582 |
75% | -2.456784 | -3.797111 | 7.929246 | 2.004642 | 0.599124 | 0.978384 |
max | 5.217502 | 7.628606 | 28.570288 | 5.096135 | 9.463210 | 4.653065 |
bend_knee_list = [training_data_store[bend_knee]]
sns.pairplot(pd.concat(bend_knee_list, ignore_index=True), height =2.5);
testing_data
run = testing_data_store['class_'].str.contains('running')
if (run.any()):
print(testing_data_store[run])
ax ay az gx gy gz class_ 0 -1.303614 -6.411490 4.077556 -0.421829 -0.733195 -0.274802 running 1 -0.998066 -6.549138 4.557804 -0.496127 -0.755675 -0.248011 running 2 -0.687327 -6.553696 4.994870 -0.511329 -0.774472 -0.243334 running 3 -0.329969 -6.566999 6.440067 -0.487907 -0.773408 -0.255150 running 4 -0.195836 -6.753056 6.938913 -0.338978 -0.745797 -0.273982 running .. ... ... ... ... ... ... ... 895 -1.142676 -7.028859 -0.988777 -0.586012 -1.856193 0.250106 running 896 -1.131136 -7.112148 -1.290695 -0.652561 -1.006078 0.268362 running 897 -0.377307 -7.173612 -1.347782 -0.657099 -0.327581 0.327912 running 898 0.466563 -6.896718 -1.469705 -0.629261 -0.020630 0.342940 running 899 1.515338 -6.184529 -1.835979 -0.574021 0.106744 0.319099 running [900 rows x 7 columns]
run_statistical_values_testing_data_store_describe = testing_data_store[run].describe()
run_statistical_values_testing_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -3.654827 | -9.961343 | 0.087934 | -0.011950 | 0.219210 | 0.086860 |
std | 4.559622 | 6.618733 | 6.154609 | 1.299093 | 2.271544 | 1.603129 |
min | -16.515050 | -34.368008 | -16.662685 | -2.389984 | -5.328996 | -2.934509 |
25% | -7.165489 | -13.280773 | -3.640242 | -0.945310 | -1.437340 | -1.192771 |
50% | -3.646460 | -9.376551 | -0.404673 | -0.421733 | -0.168704 | -0.148091 |
75% | -0.051231 | -5.318389 | 3.578123 | 0.925064 | 1.441585 | 1.008888 |
max | 8.170855 | 1.344201 | 51.412820 | 3.732369 | 8.195647 | 5.386732 |
run_list = [testing_data_store[run]]
sns.pairplot(pd.concat(run_list, ignore_index=True), height =2.5);
jump = testing_data_store['class_'].str.contains('jumping')
if (jump.any()):
print(testing_data_store[jump])
ax ay az gx gy gz class_ 1800 -0.087044 -5.947197 0.115391 0.017401 0.993337 1.039029 jumping 1801 -3.963151 -5.011601 -0.823739 -0.028711 0.860831 1.331791 jumping 1802 -1.872601 -4.339836 -1.011885 -0.237976 0.032481 1.774040 jumping 1803 0.497735 -2.944035 2.831833 -0.426942 -0.241519 1.772679 jumping 1804 -0.522606 -2.340553 4.859693 -0.637359 0.036041 1.633907 jumping ... ... ... ... ... ... ... ... 2695 -7.338276 -9.363675 19.157799 1.549992 5.654972 1.061370 jumping 2696 -3.477166 -6.216439 12.904528 2.177909 6.611516 1.524720 jumping 2697 -1.564142 -4.503015 9.139798 2.528912 6.211801 1.693475 jumping 2698 -0.148220 -3.000577 5.078973 3.001844 4.924796 1.915062 jumping 2699 0.918569 -1.990214 2.149169 3.224478 3.867231 2.036049 jumping [900 rows x 7 columns]
jump_statistical_values_testing_data_store_describe = testing_data_store[jump].describe()
jump_statistical_values_testing_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -0.118211 | -10.076424 | 3.211690 | -0.027394 | 0.227343 | -0.037803 |
std | 4.676360 | 12.195051 | 14.422964 | 2.664203 | 3.032247 | 1.476088 |
min | -22.241808 | -72.455290 | -54.965710 | -8.880236 | -20.100677 | -4.693068 |
25% | -2.868771 | -18.104933 | -3.596412 | -2.111949 | -1.435983 | -1.012736 |
50% | 0.277612 | -5.582153 | 1.155107 | 0.005236 | 0.118630 | 0.271704 |
75% | 2.275090 | -0.743720 | 7.574031 | 1.675451 | 1.705309 | 0.889394 |
max | 17.217682 | 16.545313 | 75.672134 | 12.783716 | 10.861376 | 5.727054 |
jump_list = [testing_data_store[jump]]
sns.pairplot(pd.concat(jump_list, ignore_index=True), height =2.5);
walk = testing_data_store['class_'].str.contains('walking')
if (walk.any()):
print(testing_data_store[walk])
ax ay az gx gy gz class_ 900 -1.251526 -7.742667 6.135784 0.062518 0.024609 0.179402 walking 901 -1.362013 -7.610392 6.003968 0.128037 0.198933 0.235218 walking 902 -1.516372 -7.467812 5.857481 0.146189 0.468010 0.300825 walking 903 -1.752326 -7.158730 5.429475 0.087895 0.671707 0.332712 walking 904 -1.915112 -6.941700 4.959340 0.005288 0.715742 0.328907 walking ... ... ... ... ... ... ... ... 1795 -7.246837 -7.239175 -1.813177 -1.654450 -2.499696 0.044262 walking 1796 -6.397882 -7.754753 -1.809940 -1.834323 -2.472625 0.086446 walking 1797 -5.976033 -8.979453 -1.606672 -2.002189 -2.421958 0.079343 walking 1798 -5.786172 -9.977942 -1.058812 -2.072334 -2.397297 0.071279 walking 1799 -5.205358 -11.112898 0.078137 -2.086925 -2.370140 0.061820 walking [900 rows x 7 columns]
walk_statistical_values_testing_data_store_describe = testing_data_store[walk].describe()
walk_statistical_values_testing_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -4.602216 | -9.198835 | 0.104735 | -0.107253 | -0.025419 | 0.113245 |
std | 4.267751 | 3.828462 | 4.989880 | 1.278311 | 1.623782 | 0.832404 |
min | -21.761130 | -22.758114 | -13.299868 | -3.335586 | -5.616225 | -1.212149 |
25% | -6.763136 | -11.204503 | -2.415948 | -0.961659 | -1.058224 | -0.550900 |
50% | -4.378665 | -8.943023 | -0.729532 | -0.333410 | 0.164401 | -0.116667 |
75% | -2.072562 | -6.191834 | 1.551503 | 0.642700 | 1.051609 | 0.576757 |
max | 4.543543 | -0.317241 | 35.974224 | 3.923971 | 4.877096 | 2.659533 |
walk_list = [testing_data_store[walk]]
sns.pairplot(pd.concat(walk_list, ignore_index=True), height =2.5);
bend_knee = testing_data_store['class_'].str.contains('bend_knee')
if (bend_knee.any()):
print(testing_data_store[bend_knee])
ax ay az gx gy gz class_ 2700 -1.225285 -6.566021 8.820689 2.282384 2.715191 0.595367 bend_knee 2701 -0.180054 -4.831317 8.226717 2.427700 2.934492 0.580252 bend_knee 2702 1.262855 -2.240925 4.933741 2.670563 2.550415 0.415580 bend_knee 2703 1.593897 -0.712763 -2.102032 2.518527 1.848199 0.211202 bend_knee 2704 1.437402 -0.495849 -6.156948 2.119825 1.633576 0.114947 bend_knee ... ... ... ... ... ... ... ... 3595 -9.171450 -3.374264 8.320090 0.772116 0.337809 0.808978 bend_knee 3596 -10.878668 -3.192045 8.108212 0.496721 0.239599 0.469790 bend_knee 3597 -10.963327 -3.435191 8.807771 0.264854 -0.078453 -0.017017 bend_knee 3598 -9.955962 -3.697164 9.846040 0.071157 -0.217381 -0.255795 bend_knee 3599 -9.054172 -3.656607 12.560192 -0.259478 -0.084963 -0.418268 bend_knee [900 rows x 7 columns]
bend_knee_statistical_values_testing_data_store_describe = testing_data_store[bend_knee].describe()
bend_knee_statistical_values_testing_data_store_describe
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
count | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -4.210491 | -6.401505 | 5.504825 | 0.098077 | 0.015901 | 0.078566 |
std | 3.272579 | 3.438623 | 4.878855 | 2.244579 | 0.970507 | 1.262589 |
min | -16.586464 | -20.463634 | -10.618936 | -3.989159 | -2.868885 | -2.793888 |
25% | -5.946213 | -8.598337 | 2.395557 | -2.130388 | -0.493330 | -0.896427 |
50% | -3.780871 | -6.028432 | 4.934819 | 0.230733 | 0.043738 | 0.126196 |
75% | -1.946259 | -3.965957 | 8.894357 | 2.199346 | 0.479150 | 0.929353 |
max | 3.793411 | 5.208566 | 18.733679 | 5.356031 | 4.152627 | 3.286909 |
bend_knee_list = [testing_data_store[bend_knee]]
sns.pairplot(pd.concat(bend_knee_list, ignore_index=True), height =2.5);
I had to answer the question 6 and 7 together because the codes for plot the pairplot must be the next codes for the (.describe()) method.
Answer 6:
I can measure each of the classes in statistical measure. I seperated each classes from the training data sets and testing data set.Then, I used the method (.describe) to show the statistical measure.
Training_data In run class, total count is 4500, mean of ay has the most negative value, std of ay varies a lot, ay has the most negative value in min. az has the most positive value in max.
In jump class, total count is 4500, mean of ay has the most negative value, std of az varies a lot, az has the most negative value in min; az has the most positive value in max.
In walk class, total count is 4325, mean of ay has the most negative value, std of ax and az have the varies a lot, ay has the most negative value in min; az has the most positive value in max.
In bend_knee class, total count is 4500, mean of ay has the most negative value, std of az varies a lot, ay has the most negative value in min; az has the most positive value in max.
Testing_data In run class, total count is 3600, mean of ay has the most negative value, std of az varies a lot, ay has the most negative value in min. az has the most positive value in max.
In jump class, total count is 900, mean of ay has the most negative value, std of az varies a lot, az has the most negative value in min; az has the most positive value in max.
In walk class, totol count is 900, mean of ay has the most negative value, std of az varies a lot, ay has the most negative value in min; az has the most positive value in max.
In bend_knee class, total count is 900, mean of ay has the most negative value, std of az varies a lot, ay has the most negative value in min; az has the most positive value in max
Answer 7: I used the pairplot for each classes from the training data sets and testing data set. I think it can be helpful for observe the change in different classification pattern.
ax, ay, az, gx, gy, gz describe_training_data
ax_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['ax'],
'walk': walk_statistical_values_training_data_store_describe['ax'],
'run': run_statistical_values_training_data_store_describe['ax'],
'jump': jump_statistical_values_training_data_store_describe['ax'],
'bend_knee': bend_knee_statistical_values_training_data_store_describe['ax']
})
ax_describe_training_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 17825.000000 | 4325.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | -2.986353 | -3.069930 | -3.155088 | -0.715596 | -5.008048 |
std | 4.853279 | 3.914733 | 4.865273 | 5.679784 | 3.657334 |
min | -58.355330 | -18.453377 | -19.415750 | -58.355330 | -18.992897 |
25% | -6.024376 | -5.089412 | -6.932648 | -3.226496 | -7.096464 |
50% | -3.116935 | -3.239116 | -3.904732 | 0.061258 | -4.747654 |
75% | 0.355663 | -0.617560 | 0.901872 | 2.376928 | -2.456784 |
max | 34.771310 | 10.113854 | 16.223417 | 34.771310 | 5.217502 |
ay_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['ay'],
'walk': walk_statistical_values_training_data_store_describe['ay'],
'run': run_statistical_values_training_data_store_describe['ay'],
'jump': jump_statistical_values_training_data_store_describe['ay'],
'bend_knee': bend_knee_statistical_values_training_data_store_describe['ay']
})
ay_describe_training_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 17825.000000 | 4325.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | -8.950275 | -9.333466 | -10.370162 | -9.828769 | -6.283606 |
std | 7.784934 | 3.331278 | 8.646869 | 11.496961 | 3.528068 |
min | -73.522530 | -24.694840 | -37.804590 | -73.522530 | -26.365800 |
25% | -12.290586 | -11.313857 | -15.377623 | -18.099513 | -8.348569 |
50% | -7.741853 | -9.008615 | -9.199481 | -6.441858 | -5.709247 |
75% | -3.917728 | -7.067923 | -3.546445 | -0.340767 | -3.797111 |
max | 19.047120 | 1.083788 | 4.778022 | 19.047120 | 7.628606 |
az_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['az'],
'walk': walk_statistical_values_training_data_store_describe['az'],
'run': run_statistical_values_training_data_store_describe['az'],
'jump': jump_statistical_values_training_data_store_describe['az'],
'bend_knee': bend_knee_statistical_values_training_data_store_describe['az']
})
az_describe_training_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 17825.000000 | 4325.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | 2.074440 | 0.603748 | 0.210956 | 2.870194 | 4.555668 |
std | 8.915541 | 3.941401 | 7.915198 | 14.182450 | 4.882842 |
min | -65.680550 | -13.449025 | -25.676895 | -65.680550 | -18.561280 |
25% | -1.960957 | -1.673116 | -3.924741 | -3.593546 | 1.554194 |
50% | 1.317041 | -0.078214 | -0.329322 | 0.757861 | 4.018442 |
75% | 5.650498 | 2.580815 | 4.436671 | 7.461750 | 7.929246 |
max | 77.439890 | 33.589880 | 69.153336 | 77.439890 | 28.570288 |
gx_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['gx'],
'walk': walk_statistical_values_training_data_store_describe['gx'],
'run': run_statistical_values_training_data_store_describe['gx'],
'jump': jump_statistical_values_training_data_store_describe['gx'],
'bend_knee': bend_knee_statistical_values_training_data_store_describe['gx']
})
gx_describe_training_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 17825.000000 | 4325.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | -0.031268 | -0.072456 | -0.033843 | -0.007783 | -0.012592 |
std | 1.941447 | 1.116711 | 1.673507 | 2.507901 | 2.154415 |
min | -9.467172 | -2.630403 | -4.784052 | -9.467172 | -3.821800 |
25% | -1.291544 | -0.836676 | -1.100696 | -1.587028 | -2.084477 |
50% | -0.201044 | -0.282831 | -0.296401 | 0.103542 | -0.122766 |
75% | 1.384884 | 0.446839 | 1.062046 | 1.564736 | 2.004642 |
max | 15.596314 | 3.980537 | 5.208708 | 15.596314 | 5.096135 |
gy_describe_training_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['gy'],
'walk': walk_statistical_values_training_data_store_describe['gy'],
'run': run_statistical_values_training_data_store_describe['gy'],
'jump': jump_statistical_values_training_data_store_describe['gy'],
'bend_knee': bend_knee_statistical_values_training_data_store_describe['gy']
})
gy_describe_training_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 17825.000000 | 4325.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | 0.095956 | 0.000396 | 0.277159 | 0.069305 | 0.033249 |
std | 2.518209 | 1.371160 | 3.341670 | 3.248618 | 1.244822 |
min | -24.216618 | -5.044455 | -8.252998 | -24.216618 | -5.858616 |
25% | -1.093938 | -0.907466 | -1.797942 | -1.422661 | -0.637879 |
50% | 0.033458 | 0.069290 | -0.293163 | 0.116780 | 0.085556 |
75% | 1.021890 | 0.800635 | 1.967501 | 1.563222 | 0.599124 |
max | 16.606546 | 4.611177 | 12.455402 | 16.606546 | 9.463210 |
gz_describe_training_data= pd.DataFrame({ "Total": Total_data_used_statistical_values_training_data_store_describe['gz'],
'walk': walk_statistical_values_training_data_store_describe['gz'],
'run': run_statistical_values_training_data_store_describe['gz'],
'jump': jump_statistical_values_training_data_store_describe['gz'],
'bend_knee': bend_knee_statistical_values_training_data_store_describe['gz']
})
gz_describe_training_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 17825.000000 | 4325.000000 | 4500.000000 | 4500.000000 | 4500.000000 |
mean | 0.043261 | 0.078736 | 0.078657 | -0.019031 | 0.036062 |
std | 1.561716 | 0.772815 | 2.241965 | 1.460084 | 1.387055 |
min | -5.364095 | -2.700984 | -4.036597 | -5.364095 | -4.259668 |
25% | -0.876417 | -0.486580 | -1.491318 | -0.747045 | -1.048973 |
50% | -0.076044 | -0.074351 | -0.582800 | 0.011868 | 0.080582 |
75% | 0.817460 | 0.556044 | 1.272764 | 0.737327 | 0.978384 |
max | 10.029377 | 3.627894 | 9.648895 | 10.029377 | 4.653065 |
ax, ay, az, gx, gy, gz describe_testing_data
ax_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['ax'],
'walk': walk_statistical_values_testing_data_store_describe['ax'],
'run': run_statistical_values_testing_data_store_describe['ax'],
'jump': jump_statistical_values_testing_data_store_describe['ax'],
'bend_knee': bend_knee_statistical_values_testing_data_store_describe['ax']
})
ax_describe_testing_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 3600.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -3.146436 | -4.602216 | -3.654827 | -0.118211 | -4.210491 |
std | 4.588190 | 4.267751 | 4.559622 | 4.676360 | 3.272579 |
min | -22.241808 | -21.761130 | -16.515050 | -22.241808 | -16.586464 |
25% | -5.784240 | -6.763136 | -7.165489 | -2.868771 | -5.946213 |
50% | -3.168965 | -4.378665 | -3.646460 | 0.277612 | -3.780871 |
75% | -0.084896 | -2.072562 | -0.051231 | 2.275090 | -1.946259 |
max | 17.217682 | 4.543543 | 8.170855 | 17.217682 | 3.793411 |
ay_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['ay'],
'walk': walk_statistical_values_testing_data_store_describe['ay'],
'run': run_statistical_values_testing_data_store_describe['ay'],
'jump': jump_statistical_values_testing_data_store_describe['ay'],
'bend_knee': bend_knee_statistical_values_testing_data_store_describe['ay']
})
ay_describe_testing_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 3600.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -8.909527 | -9.198835 | -9.961343 | -10.076424 | -6.401505 |
std | 7.544367 | 3.828462 | 6.618733 | 12.195051 | 3.438623 |
min | -72.455290 | -22.758114 | -34.368008 | -72.455290 | -20.463634 |
25% | -11.836035 | -11.204503 | -13.280773 | -18.104933 | -8.598337 |
50% | -7.640913 | -8.943023 | -9.376551 | -5.582153 | -6.028432 |
75% | -4.115906 | -6.191834 | -5.318389 | -0.743720 | -3.965957 |
max | 16.545313 | -0.317241 | 1.344201 | 16.545313 | 5.208566 |
az_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['az'],
'walk': walk_statistical_values_testing_data_store_describe['az'],
'run': run_statistical_values_testing_data_store_describe['az'],
'jump': jump_statistical_values_testing_data_store_describe['az'],
'bend_knee': bend_knee_statistical_values_testing_data_store_describe['az']
})
az_describe_testing_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 3600.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | 2.227296 | 0.104735 | 0.087934 | 3.211690 | 5.504825 |
std | 8.876328 | 4.989880 | 6.154609 | 14.422964 | 4.878855 |
min | -54.965710 | -13.299868 | -16.662685 | -54.965710 | -10.618936 |
25% | -2.239646 | -2.415948 | -3.640242 | -3.596412 | 2.395557 |
50% | 1.244525 | -0.729532 | -0.404673 | 1.155107 | 4.934819 |
75% | 5.800447 | 1.551503 | 3.578123 | 7.574031 | 8.894357 |
max | 75.672134 | 35.974224 | 51.412820 | 75.672134 | 18.733679 |
gx_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['gx'],
'walk': walk_statistical_values_testing_data_store_describe['gx'],
'run': run_statistical_values_testing_data_store_describe['gx'],
'jump': jump_statistical_values_testing_data_store_describe['gx'],
'bend_knee': bend_knee_statistical_values_testing_data_store_describe['gx']
})
gx_describe_testing_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 3600.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | -0.012130 | -0.107253 | -0.011950 | -0.027394 | 0.098077 |
std | 1.966366 | 1.278311 | 1.299093 | 2.664203 | 2.244579 |
min | -8.880236 | -3.335586 | -2.389984 | -8.880236 | -3.989159 |
25% | -1.277027 | -0.961659 | -0.945310 | -2.111949 | -2.130388 |
50% | -0.272053 | -0.333410 | -0.421733 | 0.005236 | 0.230733 |
75% | 1.431523 | 0.642700 | 0.925064 | 1.675451 | 2.199346 |
max | 12.783716 | 3.923971 | 3.732369 | 12.783716 | 5.356031 |
gy_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['gy'],
'walk': walk_statistical_values_testing_data_store_describe['gy'],
'run': run_statistical_values_testing_data_store_describe['gy'],
'jump': jump_statistical_values_testing_data_store_describe['gy'],
'bend_knee': bend_knee_statistical_values_testing_data_store_describe['gy']
})
gy_describe_testing_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 3600.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | 0.109259 | -0.025419 | 0.219210 | 0.227343 | 0.015901 |
std | 2.119607 | 1.623782 | 2.271544 | 3.032247 | 0.970507 |
min | -20.100677 | -5.616225 | -5.328996 | -20.100677 | -2.868885 |
25% | -1.039195 | -1.058224 | -1.437340 | -1.435983 | -0.493330 |
50% | 0.058032 | 0.164401 | -0.168704 | 0.118630 | 0.043738 |
75% | 1.058581 | 1.051609 | 1.441585 | 1.705309 | 0.479150 |
max | 10.861376 | 4.877096 | 8.195647 | 10.861376 | 4.152627 |
gz_describe_testing_data = pd.DataFrame({ "Total": Total_data_used_statistical_values_testing_data_store_describe['gz'],
'walk': walk_statistical_values_testing_data_store_describe['gz'],
'run': run_statistical_values_testing_data_store_describe['gz'],
'jump': jump_statistical_values_testing_data_store_describe['gz'],
'bend_knee': bend_knee_statistical_values_testing_data_store_describe['gz']
})
gz_describe_testing_data
Total | walk | run | jump | bend_knee | |
---|---|---|---|---|---|
count | 3600.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | 0.060217 | 0.113245 | 0.086860 | -0.037803 | 0.078566 |
std | 1.326979 | 0.832404 | 1.603129 | 1.476088 | 1.262589 |
min | -4.693068 | -1.212149 | -2.934509 | -4.693068 | -2.793888 |
25% | -0.803118 | -0.550900 | -1.192771 | -1.012736 | -0.896427 |
50% | 0.021660 | -0.116667 | -0.148091 | 0.271704 | 0.126196 |
75% | 0.862476 | 0.576757 | 1.008888 | 0.889394 | 0.929353 |
max | 5.727054 | 2.659533 | 5.386732 | 5.727054 | 3.286909 |
Correlation
¶
Q8: Calculate the correlation matrix between all features. From this result, discuss if all features are useful for the classification
correlation matrix
import seaborn as sns
import matplotlib.pyplot as plt
training_data_store.corr()
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
ax | 1.000000 | 0.115509 | -0.285243 | -0.025989 | -0.079320 | -0.101888 |
ay | 0.115509 | 1.000000 | -0.116625 | 0.103438 | -0.232466 | -0.229022 |
az | -0.285243 | -0.116625 | 1.000000 | 0.061195 | -0.118061 | -0.018629 |
gx | -0.025989 | 0.103438 | 0.061195 | 1.000000 | 0.029655 | 0.476902 |
gy | -0.079320 | -0.232466 | -0.118061 | 0.029655 | 1.000000 | 0.300157 |
gz | -0.101888 | -0.229022 | -0.018629 | 0.476902 | 0.300157 | 1.000000 |
sns.heatmap(training_data_store.corr(), annot=True)
plt.show()
testing_data_store.corr()
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
ax | 1.000000 | 0.139210 | -0.327466 | -0.107332 | -0.094760 | -0.156551 |
ay | 0.139210 | 1.000000 | -0.177498 | 0.126681 | -0.107738 | 0.005062 |
az | -0.327466 | -0.177498 | 1.000000 | 0.088376 | -0.077080 | -0.001062 |
gx | -0.107332 | 0.126681 | 0.088376 | 1.000000 | 0.067455 | 0.618611 |
gy | -0.094760 | -0.107738 | -0.077080 | 0.067455 | 1.000000 | 0.156800 |
gz | -0.156551 | 0.005062 | -0.001062 | 0.618611 | 0.156800 | 1.000000 |
sns.heatmap(testing_data_store.corr(), annot=True)
plt.show()
Answer 8: All features are useful for classification. These features are used to measure the property of acceleration and gyroscope in x, y, z directions.
Q9: Calculate the autocorrelation for all classes. Do you see a pattern? Explain what you see.
autocorrelation plotfunction
# training_data_store.drop(['class_'], axis='columns')
training_data_store_bend_knee = testing_data_store[bend_knee].drop(['class_'], axis='columns')
training_data_store_jump = testing_data_store[jump].drop(['class_'], axis='columns')
training_data_store_walk = testing_data_store[walk].drop(['class_'], axis='columns')
training_data_store_run = testing_data_store[run].drop(['class_'], axis='columns')
testing_data_store_bend_knee = testing_data_store[bend_knee].drop(['class_'], axis='columns')
testing_data_store_jump = testing_data_store[jump].drop(['class_'], axis='columns')
testing_data_store_walk = testing_data_store[walk].drop(['class_'], axis='columns')
testing_data_store_run = testing_data_store[run].drop(['class_'], axis='columns')
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_run)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_walk)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_jump)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(training_data_store_bend_knee)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_run)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_walk)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_jump)
# plotting the Curve
x.plot()
# Display
plt.show()
# Creating Autocorrelation plot
x = pd.plotting.autocorrelation_plot(testing_data_store_bend_knee)
# plotting the Curve
x.plot()
# Display
plt.show()
Answer 9: All of the graphs for autocorrelation have the decreased blue zigzag shape. It is because my body energy is lossing up in different classes of activity.
training_data_store_run is 0.75ms
training_data_store_walk is 0.95ms
training_data_store_jump is 0.65ms
training_data_store_bend_knee is 1.0ms
testing_data_store_run is 0.75ms
testing_data_store_walk is 0.95ms
testing_data_store_jump is 0.65ms
testing_data_store_bend_knee is 1.0ms
Classification
¶Q10: You need to maximize the performance for each classifier by changing values of relevant hyperparameters.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from scipy.special import softmax
onehot_encoder = OneHotEncoder(sparse=False)
def loss(X, Y, W):
"""
Y: onehot encoded
"""
Z = -X @ W
N = X.shape[0]
loss = 1/N * (np.trace(X @ W @ Y.T) + np.sum(np.log(np.sum(np.exp(Z), axis=1))))
return loss
def gradient(X, Y, W, mu):
"""
Y: onehot encoded
"""
Z = - X @ W
P = softmax(Z, axis=1)
N = X.shape[0]
gd = 1/N * (X.T @ (Y - P)) + 2 * mu * W
return gd
def gradient_descent(X, Y, max_iter=1000, eta=0.1, mu=0.01):
"""
Very basic gradient descent algorithm with fixed eta and mu
"""
Y_onehot = onehot_encoder.fit_transform(Y.reshape(-1, 1))
W = np.zeros((X.shape[1], Y_onehot.shape[1]))
step = 0
step_1st = []
loss_lst = []
W_lst = []
while step < max_iter:
step += 1
W -= eta * gradient(X, Y_onehot, W, mu)
step_1st.append(step)
W_lst.append(W)
loss_lst.append(loss(X, Y_onehot, W))
df = pd.DataFrame({
'step': step_1st,
'loss': loss_lst
})
return df, W
class Multiclass:
def fit(self, X, Y):
self.loss_steps, self.W = gradient_descent(X, Y)
def loss_plot(self):
return self.loss_steps.plot(
x='step',
y='loss',
xlabel='step',
ylabel='loss'
)
def predict(self, H):
Z = - H @ self.W
P = softmax(Z, axis=1)
return np.argmax(P, axis=1)
testing_data_store
testing_data_store.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3600 entries, 0 to 3599 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ax 3600 non-null float64 1 ay 3600 non-null float64 2 az 3600 non-null float64 3 gx 3600 non-null float64 4 gy 3600 non-null float64 5 gz 3600 non-null float64 6 class_ 3600 non-null object dtypes: float64(6), object(1) memory usage: 197.0+ KB
pd.api.types.is_string_dtype(testing_data_store['class_'])
True
for label, content in testing_data_store.items():
if pd.api.types.is_string_dtype(content):
print(label)
class_
for label, content in testing_data_store.items():
if pd.api.types.is_string_dtype(content):
testing_data_store[label] = content.astype('category').cat.as_ordered()
testing_data_store['class_'].cat.codes
0 2 1 2 2 2 3 2 4 2 .. 3595 0 3596 0 3597 0 3598 0 3599 0 Length: 3600, dtype: int8
testing_data_store['class_']
0 running 1 running 2 running 3 running 4 running ... 3595 bend_knee 3596 bend_knee 3597 bend_knee 3598 bend_knee 3599 bend_knee Name: class_, Length: 3600, dtype: category Categories (4, object): ['bend_knee' < 'jumping' < 'running' < 'walking']
testing_data_store['class_'] = testing_data_store['class_'].cat.codes
testing_data_store
ax | ay | az | gx | gy | gz | class_ | |
---|---|---|---|---|---|---|---|
0 | -1.303614 | -6.411490 | 4.077556 | -0.421829 | -0.733195 | -0.274802 | 2 |
1 | -0.998066 | -6.549138 | 4.557804 | -0.496127 | -0.755675 | -0.248011 | 2 |
2 | -0.687327 | -6.553696 | 4.994870 | -0.511329 | -0.774472 | -0.243334 | 2 |
3 | -0.329969 | -6.566999 | 6.440067 | -0.487907 | -0.773408 | -0.255150 | 2 |
4 | -0.195836 | -6.753056 | 6.938913 | -0.338978 | -0.745797 | -0.273982 | 2 |
... | ... | ... | ... | ... | ... | ... | ... |
3595 | -9.171450 | -3.374264 | 8.320090 | 0.772116 | 0.337809 | 0.808978 | 0 |
3596 | -10.878668 | -3.192045 | 8.108212 | 0.496721 | 0.239599 | 0.469790 | 0 |
3597 | -10.963327 | -3.435191 | 8.807771 | 0.264854 | -0.078453 | -0.017017 | 0 |
3598 | -9.955962 | -3.697164 | 9.846040 | 0.071157 | -0.217381 | -0.255795 | 0 |
3599 | -9.054172 | -3.656607 | 12.560192 | -0.259478 | -0.084963 | -0.418268 | 0 |
3600 rows × 7 columns
testing_data_store.shape
(3600, 7)
new_df = pd.concat([testing_data_store], axis=1).dropna().assign()
X = new_df.drop('class_', axis='columns').values
y = new_df['class_'].values
model = Multiclass()
model.fit(X, y)
model.loss_plot();
model.predict(X)
array([1, 1, 1, ..., 0, 0, 0], dtype=int64)
y
array([2, 2, 2, ..., 0, 0, 0], dtype=int8)
model.predict(X) == y
array([False, False, False, ..., True, True, True])
score = np.mean(model.predict(X) == y)
print(f'Softmax Score for testing data: {score}')
Softmax Score for testing data: 0.4363888888888889
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
X = new_df.drop('class_', axis='columns').values
y = new_df['class_']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()
# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))
DecisionTree Accuracy: 0.812962962962963
training
for label, content in training_data_store.items():
if pd.api.types.is_string_dtype(content):
print(label)
class_
for label, content in training_data_store.items():
if pd.api.types.is_string_dtype(content):
training_data_store[label] = content.astype('category').cat.as_ordered()
training_data_store['class_'] = training_data_store['class_'].cat.codes
new_df = pd.concat([training_data_store], axis=1).dropna().assign()
X = new_df.drop('class_', axis='columns').values
y = new_df['class_'].values
model = Multiclass()
model.fit(X, y)
model.loss_plot();
model.predict(X)
array([1, 3, 3, ..., 2, 2, 2], dtype=int64)
y
array([2, 2, 2, ..., 0, 0, 0], dtype=int8)
model.predict(X) == y
array([False, False, False, ..., False, False, False])
arr = []
a = 0
for i in (model.predict(X_train) == y_train):
arr.append(i)
if i == True:
a += 1
print(a)
print(len(arr))
score = a / len(arr)
print(f'Softmax Score for training data: {score}')
748 2520 Softmax Score for training data: 0.2968253968253968
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
X = new_df.drop('class_', axis='columns').values
y = new_df['class_']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()
# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))
DecisionTree Accuracy: 0.8305908750934929
Answer 10: I used softmax for tranining data sets and testing data sets. At first I used model.fit(X, y) for both data sets. Then,I used model.fit(X_train, y_train) to improve my accuracy. In training data sets, my test size=0.3; in testing data sets, my test_size = 0.1
Score for testing data
Softmax Accuracy: 0.4363888888888889
DecisionTree Accuracy: 0.812962962962963
Score for training data
Softmax Accuracy: 0.2968253968253968
DecisionTree Accuracy: 0.8305908750934929
Q11: Change the number of features for your classification. How does the performance change with the choice of the features? Use your results from preprocessing and correlation.
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
new_df = pd.concat([training_data_store], axis=1).dropna().assign()
X = new_df.drop(['class_', 'ax', 'ay', 'az'], axis='columns').values
y = new_df['class_'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()
# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))
DecisionTree Accuracy: 0.56245325355273
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
new_df = pd.concat([testing_data_store], axis=1).dropna().assign()
X = new_df.drop(['class_', 'ax', 'ay', 'az'], axis='columns').values
y = new_df['class_'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()
# Train Decision Tree Classifer
clf = clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("DecisionTree Accuracy:",metrics.accuracy_score(y_test, y_pred))
DecisionTree Accuracy: 0.5916666666666667
Answer 11: As above the line and the next line, I used 3 features which are ['gx', 'gy', 'gz'] in training data sets and testing data sets. I got lower result score for both data sets after dropped out ['ax', 'ay', 'az'].
Training data
Before dropped (['ax', 'ay', 'az']) DecisionTree Accuracy: 0.8305908750934929
After dropped (['ax', 'ay', 'az']) DecisionTree Accuracy: 0.56245325355273
Testing data
Before dropped (['ax', 'ay', 'az']) DecisionTree Accuracy: 0.812962962962963
After dropped (['ax', 'ay', 'az']) DecisionTree Accuracy: 0.5916666666666667
Train, Validate and Test
¶Q12:Present the result for the two test scenarios. Reflect over the result, is it expected, reasonable etc.?
def read_all_files(accelerometer,gyroscope, clas):
accelerometer_set = accelerometer
gyroscope_set = gyroscope
return pd.concat([accelerometer_set, gyroscope_set], axis=1).dropna().assign(class_=clas)
all_movement_1 = read_all_files(all_in_one_1_accelerometer, all_in_one_1_gyroscope, None)
X_ = all_movement_1.drop(['class_'], axis='columns')
y = all_movement_1['class_']
# y = all_movement_1['class_'].cat.codes
# # testing_data_store['class_'].cat.codesall_in_one_1_accelerometer_cut
X_.plot();
# y
y_train
array([2, 0, 0, ..., 3, 2, 3], dtype=int8)
X_
ax | ay | az | gx | gy | gz | |
---|---|---|---|---|---|---|
0 | -4.365023 | -7.358100 | 5.076733 | 0.008395 | 0.038031 | 0.006039 |
1 | -4.212665 | -7.304776 | 5.026167 | -0.107093 | -0.042987 | -0.018553 |
2 | -4.316746 | -7.281084 | 5.050798 | -0.131929 | -0.060144 | -0.038170 |
3 | -4.555965 | -7.415896 | 5.017136 | -0.162665 | -0.092153 | -0.052569 |
4 | -4.602786 | -7.428959 | 5.099611 | -0.220889 | -0.126659 | -0.059306 |
... | ... | ... | ... | ... | ... | ... |
2954 | -4.530768 | -6.012664 | 6.846249 | -0.305258 | -0.123342 | -0.004782 |
2955 | -4.778032 | -6.198387 | 6.809713 | -0.264557 | -0.106919 | 0.009669 |
2956 | -5.042572 | -6.267522 | 6.628271 | -0.213454 | -0.062535 | 0.047508 |
2957 | -4.865793 | -6.207619 | 5.935216 | -0.225060 | 0.018658 | 0.142541 |
2958 | -4.790702 | -6.241071 | 5.859234 | -0.237452 | 0.035273 | 0.175510 |
2959 rows × 6 columns
new_df = pd.concat([training_data_store], axis=1).dropna().assign()
new_df
ax | ay | az | gx | gy | gz | class_ | |
---|---|---|---|---|---|---|---|
0 | 3.536667 | -8.764157 | 8.219295 | -0.426663 | 0.376293 | 0.468237 | 2 |
1 | 3.972804 | -12.131095 | 11.188315 | -0.133465 | -0.126467 | 0.777247 | 2 |
2 | 3.418738 | -14.802985 | 12.352308 | 0.195966 | -0.441429 | 0.986390 | 2 |
3 | 1.820551 | -16.844654 | 8.020269 | 0.370743 | -0.536392 | 1.180035 | 2 |
4 | 0.771038 | -16.254522 | 3.320959 | -0.066549 | -0.279340 | 1.371567 | 2 |
... | ... | ... | ... | ... | ... | ... | ... |
17820 | -4.225421 | -7.347326 | 2.834419 | 1.315053 | 0.989951 | 0.788295 | 0 |
17821 | -3.888998 | -6.893213 | 2.058209 | 1.380154 | 1.288926 | 0.862489 | 0 |
17822 | -4.683547 | -6.589226 | 1.510789 | 1.444906 | 1.497039 | 0.967244 | 0 |
17823 | -5.238657 | -6.579218 | 0.651558 | 1.428796 | 1.569016 | 1.079469 | 0 |
17824 | -5.052723 | -6.786039 | -0.916491 | 1.325403 | 1.399684 | 1.295959 | 0 |
17825 rows × 7 columns
n_neighbors = 4
new_df = pd.concat([training_data_store], axis=1).dropna().assign()
X = new_df.drop(['class_'], axis='columns')
y = new_df['class_']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
knn = KNeighborsClassifier(n_neighbors)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_)
# y_true=y_pred
print(len(y_pred))
print(len(y_test[:2959]))
cm= confusion_matrix(y_test[:2959], y_pred)
f, ax =plt.subplots(figsize = (5,5))
sns.heatmap(cm,annot = True, linewidths= 0.5, linecolor="red", fmt=".0f", ax=ax);
plt.xlabel("y_pred");
plt.ylabel("y_true");
2959 2959
Answer 12: I used the file with all classes included. The total length is 2959.
The correct accuracy for class 0 is: (185) / (185 + 240 + 196 + 161) = 0.237
The correct accuracy for class 1 is: (207) / (164 + 207 + 206 + 149) = 0.285
The correct accuracy for class 2 is: (172) / (180 + 224 + 172 + 164) = 0.232
The correct accuracy for class 3 is: (149) / (165 + 236 + 161 + 149) = 0.210
Class 1 has the best prediction among the rest.