Source code for ModelSelection
"""
Time Series Tools
========================
"""
from abc import ABCMeta
from sklearn.model_selection import BaseCrossValidator
[docs]class TimeSeriesCV(BaseCrossValidator, metaclass=ABCMeta):
"""
This is a very naive cross validator for time series. It simply sorts the given index (default 0)
and splits the sorted index into a train and a test index set according to the given ratios.
:param test_ratio: (default .2) float betweem 0. and 1., the portion of test data
:param train_ratio: (default `None`-> .8) float betweem 0. and 1., the portion of train data
:param index: (default 0) the index of the column that corresponds to a time parameter in the data
"""
def __init__(self, test_ratio=.2, train_ratio=None, index=0):
self.index = index
if train_ratio is not None:
if train_ratio > 1.:
raise ValueError("the value of `rain_ratio` should be smaller than 1.")
self.train_ratio = train_ratio
self.test_ratio = 1. - train_ratio
elif test_ratio is not None:
if test_ratio > 1.:
raise ValueError("the value of `test_ratio` should be smaller than 1.")
self.train_ratio = 1. - test_ratio
self.test_ratio = test_ratio
else:
self.train_ratio = .8
self.test_ratio = .2
[docs] def get_n_splits(self, X=None, y=None, groups=None):
"""
Returns the number of splitting iterations in the cross-validator
:param X: Always ignored, exists for compatibility.
:param y: Always ignored, exists for compatibility.
:param groups: Always ignored, exists for compatibility.
:return: Returns the number of splitting iterations in the cross-validator which is 1 for time series.
"""
return 1
[docs] def split(self, X, y=None, groups=None):
"""
Generate indices to split data into training and test set.
:param X: array-like of shape (n_samples, n_features)
Training data, where n_samples is the number of samples
and n_features is the number of features.
:param y: array-like of shape (n_samples,), default=None
The target variable for supervised learning problems.
:param groups: array-like of shape (n_samples,), default=None
Group labels for the samples used while splitting the dataset into
train/test set.
:return: `train` The training set indices for that split. `test` The testing set indices for that split.
"""
from copy import copy
X_c = copy(X)
sorted_index = X_c[:, self.index].argsort()
cut = int(self.train_ratio * sorted_index.shape[0])
train_index = sorted_index[: cut]
test_index = sorted_index[cut:]
yield train_index, test_index