[ ] , |
, . , .
. - , .
- . "", . "" . , , .
, .
:
, .. , "" (X,Y), X Y. X , (, ). Y , (, ). - (, , 15%).
, , , , , (MAE) (accuracy). , ( , , ). , , . , 98% , 98% "", "" .
, : . 3000 , 50 , 5000 , 10 , . , , ( ). , , .
: , , , , : , , , , . ?
- , ( , , ). : , , , , . , - , , - . , 7.6% 8%, , . , , .
, . machine learning , " ", . , - , , . , . , : . , , ? , , , .
- : . : " , , , ". , , . , 5% . , 5%: , "" . "" , . , , . , : .
- . , , . . , , .
, , . X , Y. , , X . feature engineering, . : .
, . , , . , . , , , , .
, , , . "" ( ). , - . , 50% ( ), "" 50% . , - . . , : ?
, , . , , . , " ". , , . , .
, , . , , . ( ) (, , ), , -. . , : , , , . .
, - . , " " - . , , . , . , , " " . , , , .
" ", - . , . , - . , ?
, . . , java, python, , . , C++, . SQL, , , json-.
( , python), , . , , PMML, . , . , . , , . , . log()
, !
, . : , . , n*m, 1*m . . , : !
, . , data scientist'a . , ( ) , , , . . , . , .
. , , , . , , . - , , . , . , , . , , , . , !
- , , , , . , , . , , . - . , ( + ) (-, ). : , . , ( , ), , (, 10% ). A/B . , .
, , . , , . 17 , 14 23 , . -, A/B , , . , , , . " ! , ! !". , , ( ) . , , -, . .
: 2007 . , . , , , , . .
, , , , .. . , , . : , - . , X, (, ).
# coding: utf-8
# ,
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('font', family='Verdana')
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
#
np.random.seed(2)
n = 15
x_all = np.random.randint(20,size=(n,1))
y_all = x_all.ravel() + 10 * 0.1 + np.random.normal(size=n)
fltr = ((x_all<=15)&(x_all>=5))
x_train = x_all[fltr.ravel(),:]
y_train = y_all[fltr.ravel()]
x_new = x_all[~fltr.ravel(),:]
y_new = y_all[~fltr.ravel()]
x_plot = np.linspace(0, 20)
#
m1 = GradientBoostingRegressor(
n_estimators=10,
max_depth = 3,
random_state=42
).fit(x_train, y_train)
m2 = MLPRegressor(
hidden_layer_sizes=(10),
activation = 'logistic',
random_state = 42,
learning_rate_init = 1e-1,
solver = 'lbfgs',
alpha = 0.1
).fit(x_train, y_train)
m3 = LinearRegression().fit(x_train, y_train)
#
plt.figure(figsize=(12,4))
title = {1:' ',
2:', '}
for i in [1,2]:
plt.subplot(1,2,i)
plt.scatter(x_train.ravel(), y_train, lw=0, s=40)
plt.xlim([0, 20])
plt.ylim([0, 25])
plt.plot(x_plot, m1.predict(x_plot[:,np.newaxis]), color = 'red')
plt.plot(x_plot, m2.predict(x_plot[:,np.newaxis]), color = 'green')
plt.plot(x_plot, m3.predict(x_plot[:,np.newaxis]), color = 'orange')
plt.xlabel('x')
plt.ylabel('y')
plt.title(title[i])
if i == 1:
plt.legend(['', '', ' '],
loc = 'upper left')
if i == 2:
plt.scatter(x_new.ravel(), y_new, lw=0, s=40, color = 'black')
plt.show()
( ) . , ? - ( ), , , - TimeSeriesSplit sklearn. t, , t. , , .
, , . , , , . , . , .
, , , . , , : , 40% . , , , . , , . , . , , 200 , 100 , . . , , !
, , : 1% . 1% , , . , . 40% 90%, .
, , 40%, . , 0-20%, , 20-40% . - , , , . , ML- , . , 0.1% , , - . .
, , , - -, . , , , , . , . , , . " ", . , , , . : - .
, , . . , , , , , , . , .
, . , , , , . ( ). , , , . ----- . : , . , .
, . , : , , . , :
ROC-AUC -!