728x90
In [1]:
# Jupyter Notebook 환경 설정
from IPython.core.display import display, HTML
display(HTML(
"""<style>
div.container { width:100% !implotant; }
div.CodeMirror { font-family: Consolas; font-size: 16pt;}
div.output {font-size: 16pt; font-weight: bold; }
div.input {font-family: Consolas; font-size: 16pt; }
div.prompt { min-width: 100px;}
</style>
"""))
로지스틱 회귀¶
In [2]:
# 로지스틱 회귀(logistic regression)
# 다중 분류(multi-class classification0)
# Sigmoid 함수
# Softmax 함수
In [3]:
# 이진 분류 문제에서 클래스 확률 예측
# 7가지 생선에 대한 확률
In [4]:
import pandas as pd
In [6]:
fish = pd.read_csv("fish.csv")
In [7]:
fish.head()
Out[7]:
Species | Weight | Length | Diagonal | Height | Width | |
---|---|---|---|---|---|---|
0 | Bream | 242.0 | 25.4 | 30.0 | 11.5200 | 4.0200 |
1 | Bream | 290.0 | 26.3 | 31.2 | 12.4800 | 4.3056 |
2 | Bream | 340.0 | 26.5 | 31.1 | 12.3778 | 4.6961 |
3 | Bream | 363.0 | 29.0 | 33.5 | 12.7300 | 4.4555 |
4 | Bream | 430.0 | 29.0 | 34.0 | 12.4440 | 5.1340 |
In [8]:
print(pd.unique(fish['Species']))
['Bream' 'Roach' 'Whitefish' 'Parkki' 'Perch' 'Pike' 'Smelt']
In [12]:
fish_input = fish[['Weight', 'Length', 'Diagonal', 'Height', 'Width']].to_numpy()
In [13]:
print(fish_input[:5])
[[242. 25.4 30. 11.52 4.02 ] [290. 26.3 31.2 12.48 4.3056] [340. 26.5 31.1 12.3778 4.6961] [363. 29. 33.5 12.73 4.4555] [430. 29. 34. 12.444 5.134 ]]
In [11]:
fish_target = fish['Species'].to_numpy()
In [14]:
from sklearn.model_selection import train_test_split
In [15]:
train_input, test_input, train_target, test_target = train_test_split(fish_input, fish_target, random_state=42)
In [16]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(train_input)
train_scaled = ss.transform(train_input)
test_scaled = ss.transform(test_input)
In [17]:
from sklearn.neighbors import KNeighborsClassifier
In [18]:
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(train_scaled, train_target)
print(kn.score(train_scaled, train_target))
print(kn.score(test_scaled, test_target))
0.8907563025210085 0.85
In [19]:
print(kn.classes_)
['Bream' 'Parkki' 'Perch' 'Pike' 'Roach' 'Smelt' 'Whitefish']
In [22]:
print(kn.predict(test_scaled[:5]))
['Perch' 'Smelt' 'Pike' 'Perch' 'Perch']
In [23]:
import numpy as np
proba = kn.predict_proba(test_scaled[:5]) # 클래스 별 확률값을 리턴
print(np.round(proba, decimals=4))
[[0. 0. 1. 0. 0. 0. 0. ] [0. 0. 0. 0. 0. 1. 0. ] [0. 0. 0. 1. 0. 0. 0. ] [0. 0. 0.6667 0. 0.3333 0. 0. ] [0. 0. 0.6667 0. 0.3333 0. 0. ]]
In [24]:
distances, indexes = kn.kneighbors(test_scaled[3:4])
print(train_target[indexes])
[['Roach' 'Perch' 'Perch']]
In [25]:
# 로지스틱 회귀: 분류 모델
# z = a * (Weight) + b * (Length) + c * (Diagonal) + d * (Height) + e * (Width) + f
# a, b, c, d, e : 계수
# 다중 회귀 선형 방정식과 동일
In [28]:
import numpy as np
import matplotlib.pyplot as plt
In [29]:
z = np.arange(-5, 5, 0.1)
phi = 1 / (1+np.exp(-z))
In [32]:
plt.plot(z, phi)
plt.xlabel('z')
plt.ylabel('phi')
plt.show()
In [33]:
char_array = np.array(['A', 'B','C','D','F'])
print(char_array[[True, False, True, False, False]])
['A' 'C']
In [34]:
bream_smelt_indexes = (train_target == 'Bream') | (train_target == 'Smelt')
train_bream_smelt = train_scaled[bream_smelt_indexes]
target_bream_smelt = train_target[bream_smelt_indexes]
In [35]:
print(train_bream_smelt)
[[ 0.91965782 0.60943175 0.81041221 1.85194896 1.00075672] [-1.0858536 -1.68646987 -1.70848587 -1.70159849 -2.0044758 ] [ 0.63818253 0.56257661 0.73223951 1.64473401 0.50705737] [ 0.30041219 0.23459067 0.42823457 1.36042157 0.22329758] [ 0.9027693 0.70314202 0.88858491 1.89027545 0.85537174] [-1.0824759 -1.61150165 -1.62162731 -1.7000674 -1.92815631] [ 0.10337949 0.04717013 0.23714575 0.88445197 0.41799764] [ 1.49668216 1.03112796 1.21864741 2.44274986 1.40289707] [ 0.23004337 0.23459067 0.42823457 1.3336029 0.39983213] [-0.42579405 -0.29018684 -0.11028847 0.65627104 -0.26107519] [ 1.28557569 0.70314202 0.89727076 1.98228866 1.06683526] [ 0.93373158 0.60943175 0.83646978 1.85150445 0.97832415] [ 0.80706771 0.60943175 0.81041221 1.63137406 1.0255057 ] [-1.07262426 -1.52716241 -1.55214047 -1.67235972 -1.86207776] [ 1.6374198 1.17169337 1.27076255 2.41341232 1.40143407] [-1.07966115 -1.63961473 -1.67374245 -1.6462819 -1.87036806] [-1.05151362 -1.33037084 -1.39579507 -1.47914678 -1.45146425] [ 0.15967454 0.11276732 0.29794674 1.17051775 0.29205828] [ 1.56705098 1.17169337 1.32287768 2.16352457 1.17174409] [-0.00921063 0.1408804 0.3240043 0.94026245 0.14734384] [ 0.15967454 0.1408804 0.33269016 1.26991474 0.24109734] [-0.14994827 0.32830094 0.52377898 1.24719543 0.3806913 ] [-0.08520896 0.04717013 0.19371647 0.95507939 0.00439718] [ 0.86336276 0.42201121 0.62800925 1.5003429 0.45670601] [ 0.30041219 0.11276732 0.28057503 1.31303204 0.50608204] [-1.07262426 -1.45219419 -1.51739705 -1.62627903 -1.94607798] [ 0.82114147 0.51572148 0.68881023 1.76102232 0.5621025 ] [-1.07937967 -1.54590446 -1.56951218 -1.64420753 -2.01154694] [-1.07937967 -1.62087268 -1.63899902 -1.67413775 -1.93303295] [-0.29068592 -0.2058476 -0.0060582 0.89334213 -0.08697896] [-1.05095067 -1.26477365 -1.30893652 -1.46447801 -1.56606541] [ 0.51151865 0.51572148 0.7148678 1.54738669 0.68347 ] [ 0.58188748 0.32830094 0.51509312 1.50439286 0.4404302 ]]
In [36]:
from sklearn.linear_model import LogisticRegression
In [37]:
lr = LogisticRegression()
lr.fit(train_bream_smelt, target_bream_smelt)
Out[37]:
LogisticRegression()
In [38]:
print(lr.predict(train_bream_smelt[:5]))
['Bream' 'Smelt' 'Bream' 'Bream' 'Bream']
In [39]:
print(lr.predict_proba(train_bream_smelt[:5]))
[[0.99759855 0.00240145] [0.02735183 0.97264817] [0.99486072 0.00513928] [0.98584202 0.01415798] [0.99767269 0.00232731]]
In [40]:
print(lr.classes_)
['Bream' 'Smelt']
In [41]:
print(lr.coef_, lr.intercept_)
[[-0.4037798 -0.57620209 -0.66280298 -1.01290277 -0.73168947]] [-2.16155132]
In [42]:
# z = -0.404 * (weight) - 0.576 * (Length) ,,,,,,, - 2.161
decisions = lr.decision_function(train_bream_smelt[:5])
print(decisions)
[-6.02927744 3.57123907 -5.26568906 -4.24321775 -6.0607117 ]
In [43]:
#scipy
from scipy.special import expit
In [44]:
print(expit(decisions))
[0.00240145 0.97264817 0.00513928 0.01415798 0.00232731]
In [ ]:
728x90
'머신러닝' 카테고리의 다른 글
cross-validation (0) | 2022.03.22 |
---|---|
gradient-descent (0) | 2022.03.22 |
Multiple regression (0) | 2022.03.22 |
댓글