add pytorch regressor example

This commit is contained in:
Yinon Polak 2023-03-20 17:06:33 +02:00
parent 601c37f862
commit 54db239175
5 changed files with 137 additions and 14 deletions

View File

@ -1,6 +1,6 @@
import logging
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Type
import pandas as pd
import torch
@ -20,6 +20,7 @@ class PyTorchModelTrainer:
criterion: nn.Module,
device: str,
init_model: Dict,
target_tensor_type: torch.dtype,
model_meta_data: Dict[str, Any] = {},
**kwargs
):
@ -30,6 +31,8 @@ class PyTorchModelTrainer:
:param device: The device to use for training (e.g. 'cpu', 'cuda').
:param init_model: A dictionary containing the initial model/optimizer
state_dict and model_meta_data saved by self.save() method.
:param target_tensor_type: type of target tensor, for classification usually
torch.long, for regressor usually torch.float.
:param model_meta_data: Additional metadata about the model (optional).
:param max_iters: The number of training iterations to run.
iteration here refers to the number of times we call
@ -42,6 +45,7 @@ class PyTorchModelTrainer:
self.criterion = criterion
self.model_meta_data = model_meta_data
self.device = device
self.target_tensor_type = target_tensor_type
self.max_iters: int = kwargs.get("max_iters", 100)
self.batch_size: int = kwargs.get("batch_size", 64)
self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
@ -123,8 +127,8 @@ class PyTorchModelTrainer:
labels_view = labels_shape[0] if labels_shape[1] == 1 else labels_shape
dataset = TensorDataset(
torch.from_numpy(data_dictionary[f"{split}_features"].values).float(),
torch.from_numpy(data_dictionary[f"{split}_labels"].astype(float).values)
.long()
torch.from_numpy(data_dictionary[f"{split}_labels"].values)
.to(self.target_tensor_type)
.view(labels_view)
)

View File

@ -22,16 +22,6 @@ class PyTorchClassifier(BasePyTorchModel):
User must implement fit method
"""
def __init__(self, **kwargs):
"""
int: The number of nodes in the hidden layer of the neural network.
int: The maximum number of iterations to run during training.
int: The batch size to use during training.
float: The learning rate to use during training.
int: The number of training iterations between each evaluation.
dict: A dictionary mapping class names to their corresponding indices.
dict: A dictionary mapping indices to their corresponding class names.
"""
super().__init__(**kwargs)
self.class_name_to_index = None
self.index_to_class_name = None

View File

@ -11,7 +11,7 @@ from freqtrade.freqai.prediction_models.PyTorchMLPModel import PyTorchMLPModel
class PyTorchMLPClassifier(PyTorchClassifier):
"""
This class implements the fit method of IFreqaiModel.
int the fit method we initialize the model and trainer objects.
in the fit method we initialize the model and trainer objects.
the only requirement from the model is to be aligned to PyTorchClassifier
predict method that expects the model to predict tensor of type long.
the trainer defines the training loop.
@ -75,6 +75,7 @@ class PyTorchMLPClassifier(PyTorchClassifier):
model_meta_data={"class_names": class_names},
device=self.device,
init_model=init_model,
target_tensor_type=torch.long,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary)

View File

@ -0,0 +1,78 @@
from typing import Any, Dict
import torch
from freqtrade.freqai.base_models.PyTorchModelTrainer import PyTorchModelTrainer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.prediction_models.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.prediction_models.PyTorchRegressor import PyTorchRegressor
class PyTorchMLPRegressor(PyTorchRegressor):
"""
This class implements the fit method of IFreqaiModel.
in the fit method we initialize the model and trainer objects.
the only requirement from the model is to be aligned to PyTorchRegressor
predict method that expects the model to predict tensor of type float.
the trainer defines the training loop.
parameters are passed via `model_training_parameters` under the freqai
section in the config file. e.g:
{
...
"freqai": {
...
"model_training_parameters" : {
"learning_rate": 3e-4,
"trainer_kwargs": {
"max_iters": 5000,
"batch_size": 64,
"max_n_eval_batches": None,
},
"model_kwargs": {
"hidden_dim": 512,
"dropout_percent": 0.2,
"n_layer": 1,
},
}
}
}
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
model_training_params = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = model_training_params.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, any] = model_training_params.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, any] = model_training_params.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=1,
**self.model_kwargs
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.MSELoss()
init_model = self.get_init_model(dk.pair)
trainer = PyTorchModelTrainer(
model=model,
optimizer=optimizer,
criterion=criterion,
device=self.device,
init_model=init_model,
target_tensor_type=torch.float,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary)
return trainer

View File

@ -0,0 +1,50 @@
import logging
from typing import Tuple
import numpy as np
import numpy.typing as npt
import torch
from pandas import DataFrame
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class PyTorchRegressor(BasePyTorchModel):
"""
A PyTorch implementation of a regressor.
User must implement fit method
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk)
x = torch.from_numpy(dk.data_dictionary["prediction_features"].values)\
.float()\
.to(self.device)
y = self.model.model(x)
pred_df = DataFrame(y.detach().numpy(), columns=[dk.label_list[0]])
return (pred_df, dk.do_predict)