In [1]:
import numpy as np
import matplotlib.pyplot as plt
r = np.random.RandomState(10)
x = 10 * r.rand(100)
y = 2 * x - 3 * r.rand(100)
plt.scatter(x,y)
Out[1]:
<matplotlib.collections.PathCollection at 0x7f23e7857850>
In [6]:
x #array st. (1,100)
x.shape
Out[6]:
array([7.71320643, 0.20751949, 6.33648235, 7.48803883, 4.98507012,
2.24796646, 1.98062865, 7.60530712, 1.69110837, 0.88339814,
6.85359818, 9.53393346, 0.03948266, 5.12192263, 8.12620962,
6.12526067, 7.21755317, 2.91876068, 9.17774123, 7.14575783,
5.42544368, 1.42170048, 3.7334076 , 6.74133615, 4.41833174,
4.34013993, 6.17766978, 5.13138243, 6.50397182, 6.01038953,
8.05223197, 5.21647152, 9.08648881, 3.19236089, 0.90459349,
3.00700057, 1.13984362, 8.28681326, 0.46896319, 6.26287148,
5.47586156, 8.19286996, 1.9894754 , 8.56850302, 3.51652639,
7.54647692, 2.95961707, 8.8393648 , 3.25511638, 1.65015898,
3.92529244, 0.93460375, 8.21105658, 1.5115202 , 3.84114449,
9.44260712, 9.87625475, 4.56304547, 8.26122844, 2.51374134,
5.97371648, 9.0283176 , 5.34557949, 5.90201363, 0.39281767,
3.57181759, 0.7961309 , 3.05459918, 3.30719312, 7.73830296,
0.39959209, 4.29492178, 3.14926872, 6.36491143, 3.4634715 ,
0.43097356, 8.79915175, 7.63240587, 8.78096643, 4.17509144,
6.05577564, 5.13466627, 5.97836648, 2.62215661, 3.00871309,
0.25399782, 3.03062561, 2.42075875, 5.57578189, 5.6550702 ,
4.75132247, 2.92797976, 0.64251061, 9.78819146, 3.39707844,
4.95048631, 9.77080726, 4.40773825, 3.18272805, 5.19796986])
In [3]:
y.shape
Out[3]:
(100,)
In [5]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model
Out[5]:
LinearRegression()
In [7]:
# ! 에러 발생 - x를 (100,1)행렬형태로 넣어줘야함
model.fit(x, y)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_296/352589828.py in <module>
1 # ! 에러 발생
----> 2 model.fit(x, y)
/opt/conda/lib/python3.9/site-packages/sklearn/linear_model/_base.py in fit(self, X, y, sample_weight)
659 accept_sparse = False if self.positive else ["csr", "csc", "coo"]
660
--> 661 X, y = self._validate_data(
662 X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
663 )
/opt/conda/lib/python3.9/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
570 y = check_array(y, **check_y_params)
571 else:
--> 572 X, y = check_X_y(X, y, **check_params)
573 out = X, y
574
/opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
954 raise ValueError("y cannot be None")
955
--> 956 X = check_array(
957 X,
958 accept_sparse=accept_sparse,
/opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
759 # If input is 1D raise error
760 if array.ndim == 1:
--> 761 raise ValueError(
762 "Expected 2D array, got 1D array instead:\narray={}.\n"
763 "Reshape your data either using array.reshape(-1, 1) if "
ValueError: Expected 2D array, got 1D array instead:
array=[7.71320643 0.20751949 6.33648235 7.48803883 4.98507012 2.24796646
1.98062865 7.60530712 1.69110837 0.88339814 6.85359818 9.53393346
0.03948266 5.12192263 8.12620962 6.12526067 7.21755317 2.91876068
9.17774123 7.14575783 5.42544368 1.42170048 3.7334076 6.74133615
4.41833174 4.34013993 6.17766978 5.13138243 6.50397182 6.01038953
8.05223197 5.21647152 9.08648881 3.19236089 0.90459349 3.00700057
1.13984362 8.28681326 0.46896319 6.26287148 5.47586156 8.19286996
1.9894754 8.56850302 3.51652639 7.54647692 2.95961707 8.8393648
3.25511638 1.65015898 3.92529244 0.93460375 8.21105658 1.5115202
3.84114449 9.44260712 9.87625475 4.56304547 8.26122844 2.51374134
5.97371648 9.0283176 5.34557949 5.90201363 0.39281767 3.57181759
0.7961309 3.05459918 3.30719312 7.73830296 0.39959209 4.29492178
3.14926872 6.36491143 3.4634715 0.43097356 8.79915175 7.63240587
8.78096643 4.17509144 6.05577564 5.13466627 5.97836648 2.62215661
3.00871309 0.25399782 3.03062561 2.42075875 5.57578189 5.6550702
4.75132247 2.92797976 0.64251061 9.78819146 3.39707844 4.95048631
9.77080726 4.40773825 3.18272805 5.19796986].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
In [11]:
X = x.reshape(100,1) #x를 재배열
X
Out[11]:
array([[7.71320643],
[0.20751949],
[6.33648235],
[7.48803883],
[4.98507012],
[2.24796646],
[1.98062865],
[7.60530712],
[1.69110837],
[0.88339814],
[6.85359818],
[9.53393346],
[0.03948266],
[5.12192263],
[8.12620962],
[6.12526067],
[7.21755317],
[2.91876068],
[9.17774123],
[7.14575783],
[5.42544368],
[1.42170048],
[3.7334076 ],
[6.74133615],
[4.41833174],
[4.34013993],
[6.17766978],
[5.13138243],
[6.50397182],
[6.01038953],
[8.05223197],
[5.21647152],
[9.08648881],
[3.19236089],
[0.90459349],
[3.00700057],
[1.13984362],
[8.28681326],
[0.46896319],
[6.26287148],
[5.47586156],
[8.19286996],
[1.9894754 ],
[8.56850302],
[3.51652639],
[7.54647692],
[2.95961707],
[8.8393648 ],
[3.25511638],
[1.65015898],
[3.92529244],
[0.93460375],
[8.21105658],
[1.5115202 ],
[3.84114449],
[9.44260712],
[9.87625475],
[4.56304547],
[8.26122844],
[2.51374134],
[5.97371648],
[9.0283176 ],
[5.34557949],
[5.90201363],
[0.39281767],
[3.57181759],
[0.7961309 ],
[3.05459918],
[3.30719312],
[7.73830296],
[0.39959209],
[4.29492178],
[3.14926872],
[6.36491143],
[3.4634715 ],
[0.43097356],
[8.79915175],
[7.63240587],
[8.78096643],
[4.17509144],
[6.05577564],
[5.13466627],
[5.97836648],
[2.62215661],
[3.00871309],
[0.25399782],
[3.03062561],
[2.42075875],
[5.57578189],
[5.6550702 ],
[4.75132247],
[2.92797976],
[0.64251061],
[9.78819146],
[3.39707844],
[4.95048631],
[9.77080726],
[4.40773825],
[3.18272805],
[5.19796986]])
In [12]:
model.fit(X,y) # (100x1) x (1x100) 학습시키기
Out[12]:
LinearRegression()
In [32]:
#회귀값 만들기
x_new = np.linspace(-1, 11, 100) # 새 데이타 생성 (시작값,끝값,나누는갯수)
X_new = x_new.reshape(100,1) # 새 데이터 재배열
y_new = model.predict(X_new) # 모델에 따른 새 데이터 예측값
x_new
Out[32]:
array([-1. , -0.87878788, -0.75757576, -0.63636364, -0.51515152,
-0.39393939, -0.27272727, -0.15151515, -0.03030303, 0.09090909,
0.21212121, 0.33333333, 0.45454545, 0.57575758, 0.6969697 ,
0.81818182, 0.93939394, 1.06060606, 1.18181818, 1.3030303 ,
1.42424242, 1.54545455, 1.66666667, 1.78787879, 1.90909091,
2.03030303, 2.15151515, 2.27272727, 2.39393939, 2.51515152,
2.63636364, 2.75757576, 2.87878788, 3. , 3.12121212,
3.24242424, 3.36363636, 3.48484848, 3.60606061, 3.72727273,
3.84848485, 3.96969697, 4.09090909, 4.21212121, 4.33333333,
4.45454545, 4.57575758, 4.6969697 , 4.81818182, 4.93939394,
5.06060606, 5.18181818, 5.3030303 , 5.42424242, 5.54545455,
5.66666667, 5.78787879, 5.90909091, 6.03030303, 6.15151515,
6.27272727, 6.39393939, 6.51515152, 6.63636364, 6.75757576,
6.87878788, 7. , 7.12121212, 7.24242424, 7.36363636,
7.48484848, 7.60606061, 7.72727273, 7.84848485, 7.96969697,
8.09090909, 8.21212121, 8.33333333, 8.45454545, 8.57575758,
8.6969697 , 8.81818182, 8.93939394, 9.06060606, 9.18181818,
9.3030303 , 9.42424242, 9.54545455, 9.66666667, 9.78787879,
9.90909091, 10.03030303, 10.15151515, 10.27272727, 10.39393939,
10.51515152, 10.63636364, 10.75757576, 10.87878788, 11. ])
In [19]:
X_ = x_new.reshape(-1,1) #열벡터로 바꿔줌
#reshape() 함수에서 나머지 숫자를 -1로 넣으면 자동으로 남은 숫자를 계산해 줍니다.
X_.shape
Out[19]:
(100, 1)
In [25]:
from sklearn.metrics import mean_squared_error
error = np.sqrt(mean_squared_error(y,y_new)) #rmse 방식으로 구한 에러값
#np.sqrt(y,y_new) ; y의 제곱근을 y_new에 저장함
#mean-squared_error() :
print(error) #작을수록 좋은 것
9.299028215052264
In [31]:
plt.scatter(x, y, label='input data') #파란 점 - 인풋 데이터
plt.plot(X_new, y_new, color='red', label='regression line') #붉은 선 - 회귀선
Out[31]:
[<matplotlib.lines.Line2D at 0x7f23c7325dc0>]
In [44]:
from sklearn.datasets import load_wine #와인데이터 할당
data = load_wine()
type(data)
Out[44]:
sklearn.utils.Bunch
In [47]:
data.keys()
Out[47]:
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])
In [41]:
data.data
Out[41]:
array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
1.065e+03],
[1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
1.050e+03],
[1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
1.185e+03],
...,
[1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
8.350e+02],
[1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
8.400e+02],
[1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
5.600e+02]])
In [50]:
data.target.shape
Out[50]:
(178,)
In [51]:
data.data.shape
Out[51]:
(178, 13)
In [48]:
data.data.ndim
Out[48]:
2
In [49]:
data.target
Out[49]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2])
In [54]:
print(data.feature_names)
len(data.feature_names)
['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
Out[54]:
13
In [55]:
data.target_names
Out[55]:
array(['class_0', 'class_1', 'class_2'], dtype='<U7')
In [56]:
print(data.DESCR)
.. _wine_dataset:
Wine recognition dataset
------------------------
**Data Set Characteristics:**
:Number of Instances: 178 (50 in each of three classes)
:Number of Attributes: 13 numeric, predictive attributes and the class
:Attribute Information:
- Alcohol
- Malic acid
- Ash
- Alcalinity of ash
- Magnesium
- Total phenols
- Flavanoids
- Nonflavanoid phenols
- Proanthocyanins
- Color intensity
- Hue
- OD280/OD315 of diluted wines
- Proline
- class:
- class_0
- class_1
- class_2
:Summary Statistics:
============================= ==== ===== ======= =====
Min Max Mean SD
============================= ==== ===== ======= =====
Alcohol: 11.0 14.8 13.0 0.8
Malic Acid: 0.74 5.80 2.34 1.12
Ash: 1.36 3.23 2.36 0.27
Alcalinity of Ash: 10.6 30.0 19.5 3.3
Magnesium: 70.0 162.0 99.7 14.3
Total Phenols: 0.98 3.88 2.29 0.63
Flavanoids: 0.34 5.08 2.03 1.00
Nonflavanoid Phenols: 0.13 0.66 0.36 0.12
Proanthocyanins: 0.41 3.58 1.59 0.57
Colour Intensity: 1.3 13.0 5.1 2.3
Hue: 0.48 1.71 0.96 0.23
OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71
Proline: 278 1680 746 315
============================= ==== ===== ======= =====
:Missing Attribute Values: None
:Class Distribution: class_0 (59), class_1 (71), class_2 (48)
:Creator: R.A. Fisher
:Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
:Date: July, 1988
This is a copy of UCI ML Wine recognition datasets.
https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data
The data is the results of a chemical analysis of wines grown in the same
region in Italy by three different cultivators. There are thirteen different
measurements taken for different constituents found in the three types of
wine.
Original Owners:
Forina, M. et al, PARVUS -
An Extendible Package for Data Exploration, Classification and Correlation.
Institute of Pharmaceutical and Food Analysis and Technologies,
Via Brigata Salerno, 16147 Genoa, Italy.
Citation:
Lichman, M. (2013). UCI Machine Learning Repository
[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
School of Information and Computer Science.
.. topic:: References
(1) S. Aeberhard, D. Coomans and O. de Vel,
Comparison of Classifiers in High Dimensional Settings,
Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of
Mathematics and Statistics, James Cook University of North Queensland.
(Also submitted to Technometrics).
The data was used with many others for comparing various
classifiers. The classes are separable, though only RDA
has achieved 100% correct classification.
(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data))
(All results using the leave-one-out technique)
(2) S. Aeberhard, D. Coomans and O. de Vel,
"THE CLASSIFICATION PERFORMANCE OF RDA"
Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of
Mathematics and Statistics, James Cook University of North Queensland.
(Also submitted to Journal of Chemometrics).
In [57]:
import pandas as pd
pd.DataFrame(data.data, columns=data.feature_names)
Out[57]:
alcohol | malic_acid | ash | alcalinity_of_ash | magnesium | total_phenols | flavanoids | nonflavanoid_phenols | proanthocyanins | color_intensity | hue | od280/od315_of_diluted_wines | proline | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 14.23 | 1.71 | 2.43 | 15.6 | 127.0 | 2.80 | 3.06 | 0.28 | 2.29 | 5.64 | 1.04 | 3.92 | 1065.0 |
1 | 13.20 | 1.78 | 2.14 | 11.2 | 100.0 | 2.65 | 2.76 | 0.26 | 1.28 | 4.38 | 1.05 | 3.40 | 1050.0 |
2 | 13.16 | 2.36 | 2.67 | 18.6 | 101.0 | 2.80 | 3.24 | 0.30 | 2.81 | 5.68 | 1.03 | 3.17 | 1185.0 |
3 | 14.37 | 1.95 | 2.50 | 16.8 | 113.0 | 3.85 | 3.49 | 0.24 | 2.18 | 7.80 | 0.86 | 3.45 | 1480.0 |
4 | 13.24 | 2.59 | 2.87 | 21.0 | 118.0 | 2.80 | 2.69 | 0.39 | 1.82 | 4.32 | 1.04 | 2.93 | 735.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
173 | 13.71 | 5.65 | 2.45 | 20.5 | 95.0 | 1.68 | 0.61 | 0.52 | 1.06 | 7.70 | 0.64 | 1.74 | 740.0 |
174 | 13.40 | 3.91 | 2.48 | 23.0 | 102.0 | 1.80 | 0.75 | 0.43 | 1.41 | 7.30 | 0.70 | 1.56 | 750.0 |
175 | 13.27 | 4.28 | 2.26 | 20.0 | 120.0 | 1.59 | 0.69 | 0.43 | 1.35 | 10.20 | 0.59 | 1.56 | 835.0 |
176 | 13.17 | 2.59 | 2.37 | 20.0 | 120.0 | 1.65 | 0.68 | 0.53 | 1.46 | 9.30 | 0.60 | 1.62 | 840.0 |
177 | 14.13 | 4.10 | 2.74 | 24.5 | 96.0 | 2.05 | 0.76 | 0.56 | 1.35 | 9.20 | 0.61 | 1.60 | 560.0 |
178 rows × 13 columns
In [58]:
X = data.data
y = data.target #데이터할당
In [60]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier() #모델생성
In [61]:
model.fit(X, y) #모델 훈련
Out[61]:
RandomForestClassifier()
In [63]:
y_pred = model.predict(X) #모델 산출값(예측치) -> predict api 사용
In [65]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
#타겟 벡터 즉 라벨인 변수명 y와 예측값 y_pred을 각각 인자로 넣습니다.
print(classification_report(y, y_pred))
#정확도를 출력합니다.
print("accuracy = ", accuracy_score(y, y_pred))
#정확도가 100%가 나왔다...?
#이게 무엇이냐????
#data 분리를 안시켜줘서 그럼!
precision recall f1-score support
0 1.00 1.00 1.00 59
1 1.00 1.00 1.00 71
2 1.00 1.00 1.00 48
accuracy 1.00 178
macro avg 1.00 1.00 1.00 178
weighted avg 1.00 1.00 1.00 178
accuracy = 1.0
In [72]:
from sklearn.model_selection import train_test_split
result = train_test_split(X, y, test_size=0.2, random_state=42)
print(result) #리스트 산출됨(1,4), 각각의 원소는 어레이
print(type(result))
print(len(result)) #X 행렬,
[array([[1.434e+01, 1.680e+00, 2.700e+00, ..., 5.700e-01, 1.960e+00,
6.600e+02],
[1.253e+01, 5.510e+00, 2.640e+00, ..., 8.200e-01, 1.690e+00,
5.150e+02],
[1.237e+01, 1.070e+00, 2.100e+00, ..., 1.040e+00, 2.770e+00,
6.600e+02],
...,
[1.438e+01, 1.870e+00, 2.380e+00, ..., 1.200e+00, 3.000e+00,
1.547e+03],
[1.269e+01, 1.530e+00, 2.260e+00, ..., 9.600e-01, 2.060e+00,
4.950e+02],
[1.234e+01, 2.450e+00, 2.460e+00, ..., 8.000e-01, 3.380e+00,
4.380e+02]]), array([[1.364000e+01, 3.100000e+00, 2.560000e+00, 1.520000e+01,
1.160000e+02, 2.700000e+00, 3.030000e+00, 1.700000e-01,
1.660000e+00, 5.100000e+00, 9.600000e-01, 3.360000e+00,
8.450000e+02],
[1.421000e+01, 4.040000e+00, 2.440000e+00, 1.890000e+01,
1.110000e+02, 2.850000e+00, 2.650000e+00, 3.000000e-01,
1.250000e+00, 5.240000e+00, 8.700000e-01, 3.330000e+00,
1.080000e+03],
[1.293000e+01, 2.810000e+00, 2.700000e+00, 2.100000e+01,
9.600000e+01, 1.540000e+00, 5.000000e-01, 5.300000e-01,
7.500000e-01, 4.600000e+00, 7.700000e-01, 2.310000e+00,
6.000000e+02],
[1.373000e+01, 1.500000e+00, 2.700000e+00, 2.250000e+01,
1.010000e+02, 3.000000e+00, 3.250000e+00, 2.900000e-01,
2.380000e+00, 5.700000e+00, 1.190000e+00, 2.710000e+00,
1.285000e+03],
[1.237000e+01, 1.170000e+00, 1.920000e+00, 1.960000e+01,
7.800000e+01, 2.110000e+00, 2.000000e+00, 2.700000e-01,
1.040000e+00, 4.680000e+00, 1.120000e+00, 3.480000e+00,
5.100000e+02],
[1.430000e+01, 1.920000e+00, 2.720000e+00, 2.000000e+01,
1.200000e+02, 2.800000e+00, 3.140000e+00, 3.300000e-01,
1.970000e+00, 6.200000e+00, 1.070000e+00, 2.650000e+00,
1.280000e+03],
[1.200000e+01, 3.430000e+00, 2.000000e+00, 1.900000e+01,
8.700000e+01, 2.000000e+00, 1.640000e+00, 3.700000e-01,
1.870000e+00, 1.280000e+00, 9.300000e-01, 3.050000e+00,
5.640000e+02],
[1.340000e+01, 3.910000e+00, 2.480000e+00, 2.300000e+01,
1.020000e+02, 1.800000e+00, 7.500000e-01, 4.300000e-01,
1.410000e+00, 7.300000e+00, 7.000000e-01, 1.560000e+00,
7.500000e+02],
[1.161000e+01, 1.350000e+00, 2.700000e+00, 2.000000e+01,
9.400000e+01, 2.740000e+00, 2.920000e+00, 2.900000e-01,
2.490000e+00, 2.650000e+00, 9.600000e-01, 3.260000e+00,
6.800000e+02],
[1.336000e+01, 2.560000e+00, 2.350000e+00, 2.000000e+01,
8.900000e+01, 1.400000e+00, 5.000000e-01, 3.700000e-01,
6.400000e-01, 5.600000e+00, 7.000000e-01, 2.470000e+00,
7.800000e+02],
[1.350000e+01, 1.810000e+00, 2.610000e+00, 2.000000e+01,
9.600000e+01, 2.530000e+00, 2.610000e+00, 2.800000e-01,
1.660000e+00, 3.520000e+00, 1.120000e+00, 3.820000e+00,
8.450000e+02],
[1.350000e+01, 3.120000e+00, 2.620000e+00, 2.400000e+01,
1.230000e+02, 1.400000e+00, 1.570000e+00, 2.200000e-01,
1.250000e+00, 8.600000e+00, 5.900000e-01, 1.300000e+00,
5.000000e+02],
[1.341000e+01, 3.840000e+00, 2.120000e+00, 1.880000e+01,
9.000000e+01, 2.450000e+00, 2.680000e+00, 2.700000e-01,
1.480000e+00, 4.280000e+00, 9.100000e-01, 3.000000e+00,
1.035000e+03],
[1.277000e+01, 3.430000e+00, 1.980000e+00, 1.600000e+01,
8.000000e+01, 1.630000e+00, 1.250000e+00, 4.300000e-01,
8.300000e-01, 3.400000e+00, 7.000000e-01, 2.120000e+00,
3.720000e+02],
[1.363000e+01, 1.810000e+00, 2.700000e+00, 1.720000e+01,
1.120000e+02, 2.850000e+00, 2.910000e+00, 3.000000e-01,
1.460000e+00, 7.300000e+00, 1.280000e+00, 2.880000e+00,
1.310000e+03],
[1.252000e+01, 2.430000e+00, 2.170000e+00, 2.100000e+01,
8.800000e+01, 2.550000e+00, 2.270000e+00, 2.600000e-01,
1.220000e+00, 2.000000e+00, 9.000000e-01, 2.780000e+00,
3.250000e+02],
[1.141000e+01, 7.400000e-01, 2.500000e+00, 2.100000e+01,
8.800000e+01, 2.480000e+00, 2.010000e+00, 4.200000e-01,
1.440000e+00, 3.080000e+00, 1.100000e+00, 2.310000e+00,
4.340000e+02],
[1.208000e+01, 1.130000e+00, 2.510000e+00, 2.400000e+01,
7.800000e+01, 2.000000e+00, 1.580000e+00, 4.000000e-01,
1.400000e+00, 2.200000e+00, 1.310000e+00, 2.720000e+00,
6.300000e+02],
[1.386000e+01, 1.350000e+00, 2.270000e+00, 1.600000e+01,
9.800000e+01, 2.980000e+00, 3.150000e+00, 2.200000e-01,
1.850000e+00, 7.220000e+00, 1.010000e+00, 3.550000e+00,
1.045000e+03],
[1.208000e+01, 1.390000e+00, 2.500000e+00, 2.250000e+01,
8.400000e+01, 2.560000e+00, 2.290000e+00, 4.300000e-01,
1.040000e+00, 2.900000e+00, 9.300000e-01, 3.190000e+00,
3.850000e+02],
[1.419000e+01, 1.590000e+00, 2.480000e+00, 1.650000e+01,
1.080000e+02, 3.300000e+00, 3.930000e+00, 3.200000e-01,
1.860000e+00, 8.700000e+00, 1.230000e+00, 2.820000e+00,
1.680000e+03],
[1.311000e+01, 1.010000e+00, 1.700000e+00, 1.500000e+01,
7.800000e+01, 2.980000e+00, 3.180000e+00, 2.600000e-01,
2.280000e+00, 5.300000e+00, 1.120000e+00, 3.180000e+00,
5.020000e+02],
[1.233000e+01, 1.100000e+00, 2.280000e+00, 1.600000e+01,
1.010000e+02, 2.050000e+00, 1.090000e+00, 6.300000e-01,
4.100000e-01, 3.270000e+00, 1.250000e+00, 1.670000e+00,
6.800000e+02],
[1.340000e+01, 4.600000e+00, 2.860000e+00, 2.500000e+01,
1.120000e+02, 1.980000e+00, 9.600000e-01, 2.700000e-01,
1.110000e+00, 8.500000e+00, 6.700000e-01, 1.920000e+00,
6.300000e+02],
[1.277000e+01, 2.390000e+00, 2.280000e+00, 1.950000e+01,
8.600000e+01, 1.390000e+00, 5.100000e-01, 4.800000e-01,
6.400000e-01, 9.899999e+00, 5.700000e-01, 1.630000e+00,
4.700000e+02],
[1.378000e+01, 2.760000e+00, 2.300000e+00, 2.200000e+01,
9.000000e+01, 1.350000e+00, 6.800000e-01, 4.100000e-01,
1.030000e+00, 9.580000e+00, 7.000000e-01, 1.680000e+00,
6.150000e+02],
[1.242000e+01, 1.610000e+00, 2.190000e+00, 2.250000e+01,
1.080000e+02, 2.000000e+00, 2.090000e+00, 3.400000e-01,
1.610000e+00, 2.060000e+00, 1.060000e+00, 2.960000e+00,
3.450000e+02],
[1.237000e+01, 1.210000e+00, 2.560000e+00, 1.810000e+01,
9.800000e+01, 2.420000e+00, 2.650000e+00, 3.700000e-01,
2.080000e+00, 4.600000e+00, 1.190000e+00, 2.300000e+00,
6.780000e+02],
[1.208000e+01, 1.830000e+00, 2.320000e+00, 1.850000e+01,
8.100000e+01, 1.600000e+00, 1.500000e+00, 5.200000e-01,
1.640000e+00, 2.400000e+00, 1.080000e+00, 2.270000e+00,
4.800000e+02],
[1.356000e+01, 1.730000e+00, 2.460000e+00, 2.050000e+01,
1.160000e+02, 2.960000e+00, 2.780000e+00, 2.000000e-01,
2.450000e+00, 6.250000e+00, 9.800000e-01, 3.030000e+00,
1.120000e+03],
[1.402000e+01, 1.680000e+00, 2.210000e+00, 1.600000e+01,
9.600000e+01, 2.650000e+00, 2.330000e+00, 2.600000e-01,
1.980000e+00, 4.700000e+00, 1.040000e+00, 3.590000e+00,
1.035000e+03],
[1.237000e+01, 1.630000e+00, 2.300000e+00, 2.450000e+01,
8.800000e+01, 2.220000e+00, 2.450000e+00, 4.000000e-01,
1.900000e+00, 2.120000e+00, 8.900000e-01, 2.780000e+00,
3.420000e+02],
[1.316000e+01, 3.570000e+00, 2.150000e+00, 2.100000e+01,
1.020000e+02, 1.500000e+00, 5.500000e-01, 4.300000e-01,
1.300000e+00, 4.000000e+00, 6.000000e-01, 1.680000e+00,
8.300000e+02],
[1.358000e+01, 1.660000e+00, 2.360000e+00, 1.910000e+01,
1.060000e+02, 2.860000e+00, 3.190000e+00, 2.200000e-01,
1.950000e+00, 6.900000e+00, 1.090000e+00, 2.880000e+00,
1.515000e+03],
[1.375000e+01, 1.730000e+00, 2.410000e+00, 1.600000e+01,
8.900000e+01, 2.600000e+00, 2.760000e+00, 2.900000e-01,
1.810000e+00, 5.600000e+00, 1.150000e+00, 2.900000e+00,
1.320000e+03],
[1.388000e+01, 1.890000e+00, 2.590000e+00, 1.500000e+01,
1.010000e+02, 3.250000e+00, 3.560000e+00, 1.700000e-01,
1.700000e+00, 5.430000e+00, 8.800000e-01, 3.560000e+00,
1.095000e+03]]), array([2, 2, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 2, 0, 1, 0, 0, 2, 2, 1, 1, 0,
1, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 0, 1, 2, 1, 0, 2, 1, 0, 2, 1, 1,
0, 1, 0, 0, 1, 0, 0, 2, 1, 1, 1, 0, 1, 1, 1, 2, 2, 0, 1, 2, 2, 1,
1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 0, 2, 0, 2, 0, 0, 1, 1, 0, 0, 0,
1, 0, 1, 2, 1, 1, 1, 2, 2, 1, 0, 0, 1, 2, 2, 0, 1, 2, 2, 2, 2, 1,
0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 1, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1,
1, 1, 1, 2, 0, 1, 1, 0, 1, 1]), array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])]
<class 'list'>
4
In [74]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train)
[[1.434e+01 1.680e+00 2.700e+00 ... 5.700e-01 1.960e+00 6.600e+02]
[1.253e+01 5.510e+00 2.640e+00 ... 8.200e-01 1.690e+00 5.150e+02]
[1.237e+01 1.070e+00 2.100e+00 ... 1.040e+00 2.770e+00 6.600e+02]
...
[1.438e+01 1.870e+00 2.380e+00 ... 1.200e+00 3.000e+00 1.547e+03]
[1.269e+01 1.530e+00 2.260e+00 ... 9.600e-01 2.060e+00 4.950e+02]
[1.234e+01 2.450e+00 2.460e+00 ... 8.000e-01 3.380e+00 4.380e+02]]
'Computer Technology 기록부 > 코딩기록부 : Python' 카테고리의 다른 글
DecisionTreeClassifier 이용한 Data classification (0) | 2022.07.05 |
---|---|
LSTM 모델을 이용한 인공지능 작사가 구축 (2) | 2022.07.05 |
MNIST Dataset 이용한 CNN 모델 구축 (0) | 2022.07.05 |
[최종] 카메라 스티커 만들기 - 예외처리 포함 (0) | 2022.01.13 |
카메라 스티커 만들기 (0) | 2022.01.11 |
댓글