-

   rss_rss_hh_new

 - e-mail

 

 -

 LiveInternet.ru:
: 17.03.2011
:
:
: 51

:


[ ] ,

, 12 2017 . 13:21 +
cointegrated 13:21

,

, . , .


. - , .


- . "", . "" . , , .


?


, .


:


  • , ;
  • , - (, ), ;
  • , .. (, , ).

, .. , "" (X,Y), X Y. X , (, ). Y , (, ). - (, , 15%).


, , , , , (MAE) (accuracy). , ( , , ). , , . , 98% , 98% "", "" .


, : . 3000 , 50 , 5000 , 10 , . , , ( ). , , .



: , , , , : , , , , . ?


1.


- , ( , , ). : , , , , . , - , , - . , 7.6% 8%, , . , , .


, . machine learning , " ", . , - , , . , . , : . , , ? , , , .


- : . : " , , , ". , , . , 5% . , 5%: , "" . "" , . , , . , : .


- . , , . . , , .


2.


, , . X , Y. , , X . feature engineering, . : .


, . , , . , . , , , , .


, , , . "" ( ). , - . , 50% ( ), "" 50% . , - . . , : ?


3.


, , . , , . , " ". , , . , .


, , . , , . ( ) (, , ), , -. . , : , , , . .


, - . , " " - . , , . , . , , " " . , , , .


4.


" ", - . , . , - . , ?


, . . , java, python, , . , C++, . SQL, , , json-.


( , python), , . , , PMML, . , . , . , , . , . log() , !


, . : , . , n*m, 1*m . . , : !


, . , data scientist'a . , ( ) , , , . . , . , .


5.


. , , , . , , . - , , . , . , , . , , , . , !


- , , , , . , , . , , . - . , ( + ) (-, ). : , . , ( , ), , (, 10% ). A/B . , .


6.


, , . , , . 17 , 14 23 , . -, A/B , , . , , , . " ! , ! !". , , ( ) . , , -, . .


: 2007 . , . , , , , . .


, , , , .. . , , . : , - . , X, (, ).


image


python
# coding: utf-8
#  ,   
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('font', family='Verdana')
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression

#  
np.random.seed(2)
n = 15
x_all = np.random.randint(20,size=(n,1))
y_all = x_all.ravel() + 10 * 0.1 + np.random.normal(size=n)
fltr = ((x_all<=15)&(x_all>=5))
x_train = x_all[fltr.ravel(),:]
y_train = y_all[fltr.ravel()]
x_new = x_all[~fltr.ravel(),:]
y_new = y_all[~fltr.ravel()]
x_plot = np.linspace(0, 20)

#  
m1 = GradientBoostingRegressor(
    n_estimators=10, 
    max_depth = 3,
    random_state=42
    ).fit(x_train, y_train)
m2 = MLPRegressor(
    hidden_layer_sizes=(10), 
    activation = 'logistic',
    random_state = 42, 
    learning_rate_init = 1e-1, 
    solver = 'lbfgs',
    alpha = 0.1
    ).fit(x_train, y_train)
m3 = LinearRegression().fit(x_train, y_train)

#  
plt.figure(figsize=(12,4))
title = {1:'     ', 
    2:',     '}
for i in [1,2]:
    plt.subplot(1,2,i)
    plt.scatter(x_train.ravel(), y_train, lw=0, s=40)
    plt.xlim([0, 20])
    plt.ylim([0, 25])
    plt.plot(x_plot, m1.predict(x_plot[:,np.newaxis]), color = 'red')
    plt.plot(x_plot, m2.predict(x_plot[:,np.newaxis]), color = 'green')
    plt.plot(x_plot, m3.predict(x_plot[:,np.newaxis]), color = 'orange')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title(title[i])
    if i == 1:
        plt.legend(['', '', ' '], 
            loc = 'upper left')
    if i == 2:
        plt.scatter(x_new.ravel(), y_new, lw=0, s=40, color = 'black')
plt.show()

( ) . , ? - ( ), , , - TimeSeriesSplit sklearn. t, , t. , , .


, , , . , . : n . , (, ). .


, , . , , , . , . , .


7.


, , , . , , : , 40% . , , , . , , . , . , , 200 , 100 , . . , , !


, , : 1% . 1% , , . , . 40% 90%, .


, , 40%, . , 0-20%, , 20-40% . - , , , . , ML- , . , 0.1% , , - . .


8.


, , , - -, . , , , , . , . , , . " ", . , , , . : - .


, , . . , , , , , , . , .


, . , , , , . ( ). , , , . ----- . : , . , .



, . , : , , . , :


  1. ,
  2. , ,
    , - .

ROC-AUC -!

Original source: habrahabr.ru (comments, light).

https://habrahabr.ru/post/337722/

:  

: [1] []
 

:
: 

: ( )

:

  URL