In [1]:

import numpy as np
import matplotlib.pyplot as plt
r = np.random.RandomState(10)
x = 10 * r.rand(100)
y = 2 * x - 3 * r.rand(100)
plt.scatter(x,y)

Out[1]:

<matplotlib.collections.PathCollection at 0x7f23e7857850>

In [6]:

x #array st. (1,100)
x.shape

Out[6]:

array([7.71320643, 0.20751949, 6.33648235, 7.48803883, 4.98507012,
       2.24796646, 1.98062865, 7.60530712, 1.69110837, 0.88339814,
       6.85359818, 9.53393346, 0.03948266, 5.12192263, 8.12620962,
       6.12526067, 7.21755317, 2.91876068, 9.17774123, 7.14575783,
       5.42544368, 1.42170048, 3.7334076 , 6.74133615, 4.41833174,
       4.34013993, 6.17766978, 5.13138243, 6.50397182, 6.01038953,
       8.05223197, 5.21647152, 9.08648881, 3.19236089, 0.90459349,
       3.00700057, 1.13984362, 8.28681326, 0.46896319, 6.26287148,
       5.47586156, 8.19286996, 1.9894754 , 8.56850302, 3.51652639,
       7.54647692, 2.95961707, 8.8393648 , 3.25511638, 1.65015898,
       3.92529244, 0.93460375, 8.21105658, 1.5115202 , 3.84114449,
       9.44260712, 9.87625475, 4.56304547, 8.26122844, 2.51374134,
       5.97371648, 9.0283176 , 5.34557949, 5.90201363, 0.39281767,
       3.57181759, 0.7961309 , 3.05459918, 3.30719312, 7.73830296,
       0.39959209, 4.29492178, 3.14926872, 6.36491143, 3.4634715 ,
       0.43097356, 8.79915175, 7.63240587, 8.78096643, 4.17509144,
       6.05577564, 5.13466627, 5.97836648, 2.62215661, 3.00871309,
       0.25399782, 3.03062561, 2.42075875, 5.57578189, 5.6550702 ,
       4.75132247, 2.92797976, 0.64251061, 9.78819146, 3.39707844,
       4.95048631, 9.77080726, 4.40773825, 3.18272805, 5.19796986])

In [3]:

y.shape

Out[3]:

(100,)

In [5]:

from sklearn.linear_model import LinearRegression
model = LinearRegression()
model

Out[5]:

LinearRegression()

In [7]:

# ! 에러 발생 - x를 (100,1)행렬형태로 넣어줘야함
model.fit(x, y)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_296/352589828.py in <module>
      1 # ! 에러 발생
----> 2 model.fit(x, y)

/opt/conda/lib/python3.9/site-packages/sklearn/linear_model/_base.py in fit(self, X, y, sample_weight)
    659         accept_sparse = False if self.positive else ["csr", "csc", "coo"]
    660 
--> 661         X, y = self._validate_data(
    662             X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
    663         )

/opt/conda/lib/python3.9/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
    570                 y = check_array(y, **check_y_params)
    571             else:
--> 572                 X, y = check_X_y(X, y, **check_params)
    573             out = X, y
    574 

/opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
    954         raise ValueError("y cannot be None")
    955 
--> 956     X = check_array(
    957         X,
    958         accept_sparse=accept_sparse,

/opt/conda/lib/python3.9/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    759             # If input is 1D raise error
    760             if array.ndim == 1:
--> 761                 raise ValueError(
    762                     "Expected 2D array, got 1D array instead:\narray={}.\n"
    763                     "Reshape your data either using array.reshape(-1, 1) if "

ValueError: Expected 2D array, got 1D array instead:
array=[7.71320643 0.20751949 6.33648235 7.48803883 4.98507012 2.24796646
 1.98062865 7.60530712 1.69110837 0.88339814 6.85359818 9.53393346
 0.03948266 5.12192263 8.12620962 6.12526067 7.21755317 2.91876068
 9.17774123 7.14575783 5.42544368 1.42170048 3.7334076  6.74133615
 4.41833174 4.34013993 6.17766978 5.13138243 6.50397182 6.01038953
 8.05223197 5.21647152 9.08648881 3.19236089 0.90459349 3.00700057
 1.13984362 8.28681326 0.46896319 6.26287148 5.47586156 8.19286996
 1.9894754  8.56850302 3.51652639 7.54647692 2.95961707 8.8393648
 3.25511638 1.65015898 3.92529244 0.93460375 8.21105658 1.5115202
 3.84114449 9.44260712 9.87625475 4.56304547 8.26122844 2.51374134
 5.97371648 9.0283176  5.34557949 5.90201363 0.39281767 3.57181759
 0.7961309  3.05459918 3.30719312 7.73830296 0.39959209 4.29492178
 3.14926872 6.36491143 3.4634715  0.43097356 8.79915175 7.63240587
 8.78096643 4.17509144 6.05577564 5.13466627 5.97836648 2.62215661
 3.00871309 0.25399782 3.03062561 2.42075875 5.57578189 5.6550702
 4.75132247 2.92797976 0.64251061 9.78819146 3.39707844 4.95048631
 9.77080726 4.40773825 3.18272805 5.19796986].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [11]:

X = x.reshape(100,1) #x를 재배열
X

Out[11]:

array([[7.71320643],
       [0.20751949],
       [6.33648235],
       [7.48803883],
       [4.98507012],
       [2.24796646],
       [1.98062865],
       [7.60530712],
       [1.69110837],
       [0.88339814],
       [6.85359818],
       [9.53393346],
       [0.03948266],
       [5.12192263],
       [8.12620962],
       [6.12526067],
       [7.21755317],
       [2.91876068],
       [9.17774123],
       [7.14575783],
       [5.42544368],
       [1.42170048],
       [3.7334076 ],
       [6.74133615],
       [4.41833174],
       [4.34013993],
       [6.17766978],
       [5.13138243],
       [6.50397182],
       [6.01038953],
       [8.05223197],
       [5.21647152],
       [9.08648881],
       [3.19236089],
       [0.90459349],
       [3.00700057],
       [1.13984362],
       [8.28681326],
       [0.46896319],
       [6.26287148],
       [5.47586156],
       [8.19286996],
       [1.9894754 ],
       [8.56850302],
       [3.51652639],
       [7.54647692],
       [2.95961707],
       [8.8393648 ],
       [3.25511638],
       [1.65015898],
       [3.92529244],
       [0.93460375],
       [8.21105658],
       [1.5115202 ],
       [3.84114449],
       [9.44260712],
       [9.87625475],
       [4.56304547],
       [8.26122844],
       [2.51374134],
       [5.97371648],
       [9.0283176 ],
       [5.34557949],
       [5.90201363],
       [0.39281767],
       [3.57181759],
       [0.7961309 ],
       [3.05459918],
       [3.30719312],
       [7.73830296],
       [0.39959209],
       [4.29492178],
       [3.14926872],
       [6.36491143],
       [3.4634715 ],
       [0.43097356],
       [8.79915175],
       [7.63240587],
       [8.78096643],
       [4.17509144],
       [6.05577564],
       [5.13466627],
       [5.97836648],
       [2.62215661],
       [3.00871309],
       [0.25399782],
       [3.03062561],
       [2.42075875],
       [5.57578189],
       [5.6550702 ],
       [4.75132247],
       [2.92797976],
       [0.64251061],
       [9.78819146],
       [3.39707844],
       [4.95048631],
       [9.77080726],
       [4.40773825],
       [3.18272805],
       [5.19796986]])

In [12]:

model.fit(X,y) # (100x1) x (1x100) 학습시키기

Out[12]:

LinearRegression()

In [32]:

#회귀값 만들기
x_new = np.linspace(-1, 11, 100) # 새 데이타 생성 (시작값,끝값,나누는갯수)
X_new = x_new.reshape(100,1) # 새 데이터 재배열
y_new = model.predict(X_new) # 모델에 따른 새 데이터 예측값
x_new

Out[32]:

array([-1.        , -0.87878788, -0.75757576, -0.63636364, -0.51515152,
       -0.39393939, -0.27272727, -0.15151515, -0.03030303,  0.09090909,
        0.21212121,  0.33333333,  0.45454545,  0.57575758,  0.6969697 ,
        0.81818182,  0.93939394,  1.06060606,  1.18181818,  1.3030303 ,
        1.42424242,  1.54545455,  1.66666667,  1.78787879,  1.90909091,
        2.03030303,  2.15151515,  2.27272727,  2.39393939,  2.51515152,
        2.63636364,  2.75757576,  2.87878788,  3.        ,  3.12121212,
        3.24242424,  3.36363636,  3.48484848,  3.60606061,  3.72727273,
        3.84848485,  3.96969697,  4.09090909,  4.21212121,  4.33333333,
        4.45454545,  4.57575758,  4.6969697 ,  4.81818182,  4.93939394,
        5.06060606,  5.18181818,  5.3030303 ,  5.42424242,  5.54545455,
        5.66666667,  5.78787879,  5.90909091,  6.03030303,  6.15151515,
        6.27272727,  6.39393939,  6.51515152,  6.63636364,  6.75757576,
        6.87878788,  7.        ,  7.12121212,  7.24242424,  7.36363636,
        7.48484848,  7.60606061,  7.72727273,  7.84848485,  7.96969697,
        8.09090909,  8.21212121,  8.33333333,  8.45454545,  8.57575758,
        8.6969697 ,  8.81818182,  8.93939394,  9.06060606,  9.18181818,
        9.3030303 ,  9.42424242,  9.54545455,  9.66666667,  9.78787879,
        9.90909091, 10.03030303, 10.15151515, 10.27272727, 10.39393939,
       10.51515152, 10.63636364, 10.75757576, 10.87878788, 11.        ])

In [19]:

X_ = x_new.reshape(-1,1) #열벡터로 바꿔줌
#reshape() 함수에서 나머지 숫자를 -1로 넣으면 자동으로 남은 숫자를 계산해 줍니다.
X_.shape

Out[19]:

(100, 1)

In [25]:

from sklearn.metrics import mean_squared_error

error = np.sqrt(mean_squared_error(y,y_new)) #rmse 방식으로 구한 에러값

#np.sqrt(y,y_new) ; y의 제곱근을 y_new에 저장함
#mean-squared_error() : 

print(error) #작을수록 좋은 것

9.299028215052264

In [31]:

plt.scatter(x, y, label='input data') #파란 점 - 인풋 데이터
plt.plot(X_new, y_new, color='red', label='regression line') #붉은 선 - 회귀선

Out[31]:

[<matplotlib.lines.Line2D at 0x7f23c7325dc0>]

In [44]:

from sklearn.datasets import load_wine #와인데이터 할당
data = load_wine()
type(data)

Out[44]:

sklearn.utils.Bunch

In [47]:

data.keys()

Out[47]:

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [41]:

data.data

Out[41]:

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]])

In [50]:

data.target.shape

Out[50]:

(178,)

In [51]:

data.data.shape

Out[51]:

(178, 13)

In [48]:

data.data.ndim

Out[48]:

In [49]:

data.target

Out[49]:

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

In [54]:

print(data.feature_names)

len(data.feature_names)

['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']

Out[54]:

In [55]:

data.target_names

Out[55]:

array(['class_0', 'class_1', 'class_2'], dtype='<U7')

In [56]:

print(data.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
    ============================= ==== ===== ======= =====
                                   Min   Max   Mean     SD
    ============================= ==== ===== ======= =====
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0.98  3.88    2.29  0.63
    Flavanoids:                   0.34  5.08    2.03  1.00
    Nonflavanoid Phenols:         0.13  0.66    0.36  0.12
    Proanthocyanins:              0.41  3.58    1.59  0.57
    Colour Intensity:              1.3  13.0     5.1   2.3
    Hue:                          0.48  1.71    0.96  0.23
    OD280/OD315 of diluted wines: 1.27  4.00    2.61  0.71
    Proline:                       278  1680     746   315
    ============================= ==== ===== ======= =====

    :Missing Attribute Values: None
    :Class Distribution: class_0 (59), class_1 (71), class_2 (48)
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :Date: July, 1988

This is a copy of UCI ML Wine recognition datasets.
https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data

The data is the results of a chemical analysis of wines grown in the same
region in Italy by three different cultivators. There are thirteen different
measurements taken for different constituents found in the three types of
wine.

Original Owners: 

Forina, M. et al, PARVUS - 
An Extendible Package for Data Exploration, Classification and Correlation. 
Institute of Pharmaceutical and Food Analysis and Technologies,
Via Brigata Salerno, 16147 Genoa, Italy.

Citation:

Lichman, M. (2013). UCI Machine Learning Repository
[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
School of Information and Computer Science. 

.. topic:: References

  (1) S. Aeberhard, D. Coomans and O. de Vel, 
  Comparison of Classifiers in High Dimensional Settings, 
  Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of  
  Mathematics and Statistics, James Cook University of North Queensland. 
  (Also submitted to Technometrics). 

  The data was used with many others for comparing various 
  classifiers. The classes are separable, though only RDA 
  has achieved 100% correct classification. 
  (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 
  (All results using the leave-one-out technique) 

  (2) S. Aeberhard, D. Coomans and O. de Vel, 
  "THE CLASSIFICATION PERFORMANCE OF RDA" 
  Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 
  Mathematics and Statistics, James Cook University of North Queensland. 
  (Also submitted to Journal of Chemometrics).

In [57]:

import pandas as pd

pd.DataFrame(data.data, columns=data.feature_names)

Out[57]:

	alcohol	malic_acid	ash	alcalinity_of_ash	magnesium	total_phenols	flavanoids	nonflavanoid_phenols	proanthocyanins	color_intensity	hue	od280/od315_of_diluted_wines	proline
0	14.23	1.71	2.43	15.6	127.0	2.80	3.06	0.28	2.29	5.64	1.04	3.92	1065.0
1	13.20	1.78	2.14	11.2	100.0	2.65	2.76	0.26	1.28	4.38	1.05	3.40	1050.0
2	13.16	2.36	2.67	18.6	101.0	2.80	3.24	0.30	2.81	5.68	1.03	3.17	1185.0
3	14.37	1.95	2.50	16.8	113.0	3.85	3.49	0.24	2.18	7.80	0.86	3.45	1480.0
4	13.24	2.59	2.87	21.0	118.0	2.80	2.69	0.39	1.82	4.32	1.04	2.93	735.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
173	13.71	5.65	2.45	20.5	95.0	1.68	0.61	0.52	1.06	7.70	0.64	1.74	740.0
174	13.40	3.91	2.48	23.0	102.0	1.80	0.75	0.43	1.41	7.30	0.70	1.56	750.0
175	13.27	4.28	2.26	20.0	120.0	1.59	0.69	0.43	1.35	10.20	0.59	1.56	835.0
176	13.17	2.59	2.37	20.0	120.0	1.65	0.68	0.53	1.46	9.30	0.60	1.62	840.0
177	14.13	4.10	2.74	24.5	96.0	2.05	0.76	0.56	1.35	9.20	0.61	1.60	560.0

178 rows × 13 columns

In [58]:

X = data.data
y = data.target #데이터할당

In [60]:

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier() #모델생성

In [61]:

model.fit(X, y) #모델 훈련

Out[61]:

RandomForestClassifier()

In [63]:

y_pred = model.predict(X) #모델 산출값(예측치) -> predict api 사용

In [65]:

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

#타겟 벡터 즉 라벨인 변수명 y와 예측값 y_pred을 각각 인자로 넣습니다. 
print(classification_report(y, y_pred))
#정확도를 출력합니다. 
print("accuracy = ", accuracy_score(y, y_pred))

#정확도가 100%가 나왔다...?
#이게 무엇이냐????
#data 분리를 안시켜줘서 그럼!

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        59
           1       1.00      1.00      1.00        71
           2       1.00      1.00      1.00        48

    accuracy                           1.00       178
   macro avg       1.00      1.00      1.00       178
weighted avg       1.00      1.00      1.00       178

accuracy =  1.0

In [72]:

from sklearn.model_selection import train_test_split

result = train_test_split(X, y, test_size=0.2, random_state=42)
print(result) #리스트 산출됨(1,4), 각각의 원소는 어레이
print(type(result))
print(len(result)) #X 행렬, 

[array([[1.434e+01, 1.680e+00, 2.700e+00, ..., 5.700e-01, 1.960e+00,
        6.600e+02],
       [1.253e+01, 5.510e+00, 2.640e+00, ..., 8.200e-01, 1.690e+00,
        5.150e+02],
       [1.237e+01, 1.070e+00, 2.100e+00, ..., 1.040e+00, 2.770e+00,
        6.600e+02],
       ...,
       [1.438e+01, 1.870e+00, 2.380e+00, ..., 1.200e+00, 3.000e+00,
        1.547e+03],
       [1.269e+01, 1.530e+00, 2.260e+00, ..., 9.600e-01, 2.060e+00,
        4.950e+02],
       [1.234e+01, 2.450e+00, 2.460e+00, ..., 8.000e-01, 3.380e+00,
        4.380e+02]]), array([[1.364000e+01, 3.100000e+00, 2.560000e+00, 1.520000e+01,
        1.160000e+02, 2.700000e+00, 3.030000e+00, 1.700000e-01,
        1.660000e+00, 5.100000e+00, 9.600000e-01, 3.360000e+00,
        8.450000e+02],
       [1.421000e+01, 4.040000e+00, 2.440000e+00, 1.890000e+01,
        1.110000e+02, 2.850000e+00, 2.650000e+00, 3.000000e-01,
        1.250000e+00, 5.240000e+00, 8.700000e-01, 3.330000e+00,
        1.080000e+03],
       [1.293000e+01, 2.810000e+00, 2.700000e+00, 2.100000e+01,
        9.600000e+01, 1.540000e+00, 5.000000e-01, 5.300000e-01,
        7.500000e-01, 4.600000e+00, 7.700000e-01, 2.310000e+00,
        6.000000e+02],
       [1.373000e+01, 1.500000e+00, 2.700000e+00, 2.250000e+01,
        1.010000e+02, 3.000000e+00, 3.250000e+00, 2.900000e-01,
        2.380000e+00, 5.700000e+00, 1.190000e+00, 2.710000e+00,
        1.285000e+03],
       [1.237000e+01, 1.170000e+00, 1.920000e+00, 1.960000e+01,
        7.800000e+01, 2.110000e+00, 2.000000e+00, 2.700000e-01,
        1.040000e+00, 4.680000e+00, 1.120000e+00, 3.480000e+00,
        5.100000e+02],
       [1.430000e+01, 1.920000e+00, 2.720000e+00, 2.000000e+01,
        1.200000e+02, 2.800000e+00, 3.140000e+00, 3.300000e-01,
        1.970000e+00, 6.200000e+00, 1.070000e+00, 2.650000e+00,
        1.280000e+03],
       [1.200000e+01, 3.430000e+00, 2.000000e+00, 1.900000e+01,
        8.700000e+01, 2.000000e+00, 1.640000e+00, 3.700000e-01,
        1.870000e+00, 1.280000e+00, 9.300000e-01, 3.050000e+00,
        5.640000e+02],
       [1.340000e+01, 3.910000e+00, 2.480000e+00, 2.300000e+01,
        1.020000e+02, 1.800000e+00, 7.500000e-01, 4.300000e-01,
        1.410000e+00, 7.300000e+00, 7.000000e-01, 1.560000e+00,
        7.500000e+02],
       [1.161000e+01, 1.350000e+00, 2.700000e+00, 2.000000e+01,
        9.400000e+01, 2.740000e+00, 2.920000e+00, 2.900000e-01,
        2.490000e+00, 2.650000e+00, 9.600000e-01, 3.260000e+00,
        6.800000e+02],
       [1.336000e+01, 2.560000e+00, 2.350000e+00, 2.000000e+01,
        8.900000e+01, 1.400000e+00, 5.000000e-01, 3.700000e-01,
        6.400000e-01, 5.600000e+00, 7.000000e-01, 2.470000e+00,
        7.800000e+02],
       [1.350000e+01, 1.810000e+00, 2.610000e+00, 2.000000e+01,
        9.600000e+01, 2.530000e+00, 2.610000e+00, 2.800000e-01,
        1.660000e+00, 3.520000e+00, 1.120000e+00, 3.820000e+00,
        8.450000e+02],
       [1.350000e+01, 3.120000e+00, 2.620000e+00, 2.400000e+01,
        1.230000e+02, 1.400000e+00, 1.570000e+00, 2.200000e-01,
        1.250000e+00, 8.600000e+00, 5.900000e-01, 1.300000e+00,
        5.000000e+02],
       [1.341000e+01, 3.840000e+00, 2.120000e+00, 1.880000e+01,
        9.000000e+01, 2.450000e+00, 2.680000e+00, 2.700000e-01,
        1.480000e+00, 4.280000e+00, 9.100000e-01, 3.000000e+00,
        1.035000e+03],
       [1.277000e+01, 3.430000e+00, 1.980000e+00, 1.600000e+01,
        8.000000e+01, 1.630000e+00, 1.250000e+00, 4.300000e-01,
        8.300000e-01, 3.400000e+00, 7.000000e-01, 2.120000e+00,
        3.720000e+02],
       [1.363000e+01, 1.810000e+00, 2.700000e+00, 1.720000e+01,
        1.120000e+02, 2.850000e+00, 2.910000e+00, 3.000000e-01,
        1.460000e+00, 7.300000e+00, 1.280000e+00, 2.880000e+00,
        1.310000e+03],
       [1.252000e+01, 2.430000e+00, 2.170000e+00, 2.100000e+01,
        8.800000e+01, 2.550000e+00, 2.270000e+00, 2.600000e-01,
        1.220000e+00, 2.000000e+00, 9.000000e-01, 2.780000e+00,
        3.250000e+02],
       [1.141000e+01, 7.400000e-01, 2.500000e+00, 2.100000e+01,
        8.800000e+01, 2.480000e+00, 2.010000e+00, 4.200000e-01,
        1.440000e+00, 3.080000e+00, 1.100000e+00, 2.310000e+00,
        4.340000e+02],
       [1.208000e+01, 1.130000e+00, 2.510000e+00, 2.400000e+01,
        7.800000e+01, 2.000000e+00, 1.580000e+00, 4.000000e-01,
        1.400000e+00, 2.200000e+00, 1.310000e+00, 2.720000e+00,
        6.300000e+02],
       [1.386000e+01, 1.350000e+00, 2.270000e+00, 1.600000e+01,
        9.800000e+01, 2.980000e+00, 3.150000e+00, 2.200000e-01,
        1.850000e+00, 7.220000e+00, 1.010000e+00, 3.550000e+00,
        1.045000e+03],
       [1.208000e+01, 1.390000e+00, 2.500000e+00, 2.250000e+01,
        8.400000e+01, 2.560000e+00, 2.290000e+00, 4.300000e-01,
        1.040000e+00, 2.900000e+00, 9.300000e-01, 3.190000e+00,
        3.850000e+02],
       [1.419000e+01, 1.590000e+00, 2.480000e+00, 1.650000e+01,
        1.080000e+02, 3.300000e+00, 3.930000e+00, 3.200000e-01,
        1.860000e+00, 8.700000e+00, 1.230000e+00, 2.820000e+00,
        1.680000e+03],
       [1.311000e+01, 1.010000e+00, 1.700000e+00, 1.500000e+01,
        7.800000e+01, 2.980000e+00, 3.180000e+00, 2.600000e-01,
        2.280000e+00, 5.300000e+00, 1.120000e+00, 3.180000e+00,
        5.020000e+02],
       [1.233000e+01, 1.100000e+00, 2.280000e+00, 1.600000e+01,
        1.010000e+02, 2.050000e+00, 1.090000e+00, 6.300000e-01,
        4.100000e-01, 3.270000e+00, 1.250000e+00, 1.670000e+00,
        6.800000e+02],
       [1.340000e+01, 4.600000e+00, 2.860000e+00, 2.500000e+01,
        1.120000e+02, 1.980000e+00, 9.600000e-01, 2.700000e-01,
        1.110000e+00, 8.500000e+00, 6.700000e-01, 1.920000e+00,
        6.300000e+02],
       [1.277000e+01, 2.390000e+00, 2.280000e+00, 1.950000e+01,
        8.600000e+01, 1.390000e+00, 5.100000e-01, 4.800000e-01,
        6.400000e-01, 9.899999e+00, 5.700000e-01, 1.630000e+00,
        4.700000e+02],
       [1.378000e+01, 2.760000e+00, 2.300000e+00, 2.200000e+01,
        9.000000e+01, 1.350000e+00, 6.800000e-01, 4.100000e-01,
        1.030000e+00, 9.580000e+00, 7.000000e-01, 1.680000e+00,
        6.150000e+02],
       [1.242000e+01, 1.610000e+00, 2.190000e+00, 2.250000e+01,
        1.080000e+02, 2.000000e+00, 2.090000e+00, 3.400000e-01,
        1.610000e+00, 2.060000e+00, 1.060000e+00, 2.960000e+00,
        3.450000e+02],
       [1.237000e+01, 1.210000e+00, 2.560000e+00, 1.810000e+01,
        9.800000e+01, 2.420000e+00, 2.650000e+00, 3.700000e-01,
        2.080000e+00, 4.600000e+00, 1.190000e+00, 2.300000e+00,
        6.780000e+02],
       [1.208000e+01, 1.830000e+00, 2.320000e+00, 1.850000e+01,
        8.100000e+01, 1.600000e+00, 1.500000e+00, 5.200000e-01,
        1.640000e+00, 2.400000e+00, 1.080000e+00, 2.270000e+00,
        4.800000e+02],
       [1.356000e+01, 1.730000e+00, 2.460000e+00, 2.050000e+01,
        1.160000e+02, 2.960000e+00, 2.780000e+00, 2.000000e-01,
        2.450000e+00, 6.250000e+00, 9.800000e-01, 3.030000e+00,
        1.120000e+03],
       [1.402000e+01, 1.680000e+00, 2.210000e+00, 1.600000e+01,
        9.600000e+01, 2.650000e+00, 2.330000e+00, 2.600000e-01,
        1.980000e+00, 4.700000e+00, 1.040000e+00, 3.590000e+00,
        1.035000e+03],
       [1.237000e+01, 1.630000e+00, 2.300000e+00, 2.450000e+01,
        8.800000e+01, 2.220000e+00, 2.450000e+00, 4.000000e-01,
        1.900000e+00, 2.120000e+00, 8.900000e-01, 2.780000e+00,
        3.420000e+02],
       [1.316000e+01, 3.570000e+00, 2.150000e+00, 2.100000e+01,
        1.020000e+02, 1.500000e+00, 5.500000e-01, 4.300000e-01,
        1.300000e+00, 4.000000e+00, 6.000000e-01, 1.680000e+00,
        8.300000e+02],
       [1.358000e+01, 1.660000e+00, 2.360000e+00, 1.910000e+01,
        1.060000e+02, 2.860000e+00, 3.190000e+00, 2.200000e-01,
        1.950000e+00, 6.900000e+00, 1.090000e+00, 2.880000e+00,
        1.515000e+03],
       [1.375000e+01, 1.730000e+00, 2.410000e+00, 1.600000e+01,
        8.900000e+01, 2.600000e+00, 2.760000e+00, 2.900000e-01,
        1.810000e+00, 5.600000e+00, 1.150000e+00, 2.900000e+00,
        1.320000e+03],
       [1.388000e+01, 1.890000e+00, 2.590000e+00, 1.500000e+01,
        1.010000e+02, 3.250000e+00, 3.560000e+00, 1.700000e-01,
        1.700000e+00, 5.430000e+00, 8.800000e-01, 3.560000e+00,
        1.095000e+03]]), array([2, 2, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 2, 0, 1, 0, 0, 2, 2, 1, 1, 0,
       1, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 0, 1, 2, 1, 0, 2, 1, 0, 2, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 2, 1, 1, 1, 0, 1, 1, 1, 2, 2, 0, 1, 2, 2, 1,
       1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 0, 2, 0, 2, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 1, 2, 1, 1, 1, 2, 2, 1, 0, 0, 1, 2, 2, 0, 1, 2, 2, 2, 2, 1,
       0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 1, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1,
       1, 1, 1, 2, 0, 1, 1, 0, 1, 1]), array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])]
<class 'list'>
4

In [74]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train)

[[1.434e+01 1.680e+00 2.700e+00 ... 5.700e-01 1.960e+00 6.600e+02]
 [1.253e+01 5.510e+00 2.640e+00 ... 8.200e-01 1.690e+00 5.150e+02]
 [1.237e+01 1.070e+00 2.100e+00 ... 1.040e+00 2.770e+00 6.600e+02]
 ...
 [1.438e+01 1.870e+00 2.380e+00 ... 1.200e+00 3.000e+00 1.547e+03]
 [1.269e+01 1.530e+00 2.260e+00 ... 9.600e-01 2.060e+00 4.950e+02]
 [1.234e+01 2.450e+00 2.460e+00 ... 8.000e-01 3.380e+00 4.380e+02]]

저작자표시

'Computer Technology 기록부 > 코딩기록부 : Python' 카테고리의 다른 글

DecisionTreeClassifier 이용한 Data classification (0)	2022.07.05
LSTM 모델을 이용한 인공지능 작사가 구축 (2)	2022.07.05
MNIST Dataset 이용한 CNN 모델 구축 (0)	2022.07.05
[최종] 카메라 스티커 만들기 - 예외처리 포함 (0)	2022.01.13
카메라 스티커 만들기 (0)	2022.01.11

생활기록부

머신러닝을 위한 sklearn 라이브러리 기능 이용

'Computer Technology 기록부 > 코딩기록부 : Python' 카테고리의 다른 글

댓글

티스토리툴바

머신러닝을 위한 sklearn 라이브러리 기능 이용

'Computer Technology 기록부 > 코딩기록부 : Python' 카테고리의 다른 글

관련글

댓글

티스토리툴바