Data Science/Machine learning
[머신러닝 실습] 제조/IoT 산업 내 AI 혁신과 스마트팩토리
오기오기
2021. 11. 24. 09:27
728x90
반응형
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
# to avoid warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, average_precision_score
from elice_utils import EliceUtils
elice_utils = EliceUtils()
def preprocess():
#print("데이터 읽는 중...")
data = pd.read_csv('data/uci-secom.csv')
#print(data.isnull().sum())
#print("결측 데이터 값을 처리합니다.")
data = data.replace(np.NaN, 0)
#print(data.isnull().sum())
# 쓸모없는 데이터 지우기
data = data.drop(columns = ['Time'], axis = 1)
# 타겟 데이터 분리
x = data.iloc[:,:590]
y = data.iloc[:, 590]
#print("학습용 데이터와 테스트용 데이터로 분리 합니다.")
# Under sampling 수행
failed_tests = np.array(data[data['Pass/Fail'] == 1].index)
no_failed_tests = len(failed_tests)
normal_indices = data[data['Pass/Fail'] == -1]
no_normal_indices = len(normal_indices)
np.random.seed(10)
random_normal_indices = np.random.choice(no_normal_indices, size = no_failed_tests, replace = True)
random_normal_indices = np.array(random_normal_indices)
under_sample = np.concatenate([failed_tests, random_normal_indices])
undersample_data = data.iloc[under_sample, :]
x = undersample_data.iloc[:, undersample_data.columns != 'Pass/Fail']
y = undersample_data.iloc[:, undersample_data.columns == 'Pass/Fail']
y = np.ravel(y)
x_train_us, x_test_us, y_train_us, y_test_us = train_test_split(x, y, test_size = 0.2, random_state = 4)
#print("학습용 데이터 크기: {}".format(x_train_us.shape))
#print("테스트용 데이터 크기: {}".format(x_test_us.shape))
return x_train_us, x_test_us, y_train_us, y_test_us
def train(x_train_us, y_train_us):
"""
#print("학습을 수행합니다.")
model = XGBClassifier(random_state=2)
# 파라미터 튜닝
parameters = [{'max_depth' : [1, 2, 3, 4, 5, 6]}]
grid_search = GridSearchCV(estimator = model, param_grid = parameters, scoring = 'recall', cv = 4, n_jobs = -1)
grid_search = grid_search.fit(x_train_us, y_train_us)
# 베스트 모델 선택
model = grid_search.best_estimator_
"""
import pickle
with open('model.pkl', 'rb') as f:
model = pickle.load(f)
return model
def evaluation():
x_train_us, x_test_us, y_train_us, y_test_us = preprocess()
model = train(x_train_us, y_train_us)
# 테스트 데이터 예측
y_pred = model.predict(x_test_us)
print('평가 지표인 recall score를 출력합니다.: ', recall_score(y_test_us, y_pred), '\n')
print("센서들의 중요도를 출력합니다.")
xgb.plot_importance(model, height = 1, grid = True, importance_type = 'gain', show_values = False, max_num_features = 20)
plt.rcParams['figure.figsize'] = (10, 15)
plt.xlabel('The importance score for each features')
plt.ylabel('Features')
plt.savefig("result1.png")
elice_utils.send_image("result1.png")
def predict(value_103_sensor):
x_train_us, x_test_us, y_train_us, y_test_us, data = preprocess()
model = train(x_train_us, y_train_us)
pre_data = data
pre_data.loc[1242, '103'] = value_103_sensor
pre_data = pre_data[1242:1243]
prediction = model.predict(pre_data.drop(columns = 'Pass/Fail'))
fig, ax = plt.subplots(figsize=(8, 6))
sns.distplot(data['103'], color = 'darkblue')
plt.title('103 Sensor Measurements', fontsize = 20)
ax.annotate('103_sensor_value',
xy=(value_103_sensor, 0), xycoords='data',
xytext=(10, 30), textcoords='offset points',
arrowprops=dict(facecolor='black', shrink=0.05),
horizontalalignment='left', verticalalignment='bottom')
plt.savefig("result1.png")
elice_utils.send_image("result1.png")
if prediction == 1:
print("센서 데이터 값이 {}인 경우 공정 이상이 발생할 것으로 예측됩니다.".format(value_103_sensor))
else:
print("센서 데이터 값이 {}인 경우 공정 이상이 발생하지 않을 것으로 예측됩니다.".format(value_103_sensor))
728x90
반응형