Merge pull request #8297 from Yinon-Polak/feat/add-pytorch-model-support
Feat/add pytorch model support
This commit is contained in:
147
freqtrade/freqai/base_models/BasePyTorchClassifier.py
Normal file
147
freqtrade/freqai/base_models/BasePyTorchClassifier.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import logging
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
import torch
|
||||
from pandas import DataFrame
|
||||
from torch.nn import functional as F
|
||||
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BasePyTorchClassifier(BasePyTorchModel):
|
||||
"""
|
||||
A PyTorch implementation of a classifier.
|
||||
User must implement fit method
|
||||
|
||||
Important!
|
||||
|
||||
- User must declare the target class names in the strategy,
|
||||
under IStrategy.set_freqai_targets method.
|
||||
|
||||
for example, in your strategy:
|
||||
```
|
||||
def set_freqai_targets(self, dataframe: DataFrame, metadata: Dict, **kwargs):
|
||||
self.freqai.class_names = ["down", "up"]
|
||||
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) >
|
||||
dataframe["close"], 'up', 'down')
|
||||
|
||||
return dataframe
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.class_name_to_index = None
|
||||
self.index_to_class_name = None
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
:raises ValueError: if 'class_names' doesn't exist in model meta_data.
|
||||
"""
|
||||
|
||||
class_names = self.model.model_meta_data.get("class_names", None)
|
||||
if not class_names:
|
||||
raise ValueError(
|
||||
"Missing class names. "
|
||||
"self.model.model_meta_data['class_names'] is None."
|
||||
)
|
||||
|
||||
if not self.class_name_to_index:
|
||||
self.init_class_names_to_index_mapping(class_names)
|
||||
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_df, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
self.data_cleaning_predict(dk)
|
||||
x = self.data_convertor.convert_x(
|
||||
dk.data_dictionary["prediction_features"],
|
||||
device=self.device
|
||||
)
|
||||
logits = self.model.model(x)
|
||||
probs = F.softmax(logits, dim=-1)
|
||||
predicted_classes = torch.argmax(probs, dim=-1)
|
||||
predicted_classes_str = self.decode_class_names(predicted_classes)
|
||||
pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)
|
||||
pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
|
||||
pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
|
||||
return (pred_df, dk.do_predict)
|
||||
|
||||
def encode_class_names(
|
||||
self,
|
||||
data_dictionary: Dict[str, pd.DataFrame],
|
||||
dk: FreqaiDataKitchen,
|
||||
class_names: List[str],
|
||||
):
|
||||
"""
|
||||
encode class name, str -> int
|
||||
assuming first column of *_labels data frame to be the target column
|
||||
containing the class names
|
||||
"""
|
||||
|
||||
target_column_name = dk.label_list[0]
|
||||
for split in self.splits:
|
||||
label_df = data_dictionary[f"{split}_labels"]
|
||||
self.assert_valid_class_names(label_df[target_column_name], class_names)
|
||||
label_df[target_column_name] = list(
|
||||
map(lambda x: self.class_name_to_index[x], label_df[target_column_name])
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def assert_valid_class_names(
|
||||
target_column: pd.Series,
|
||||
class_names: List[str]
|
||||
):
|
||||
non_defined_labels = set(target_column) - set(class_names)
|
||||
if len(non_defined_labels) != 0:
|
||||
raise OperationalException(
|
||||
f"Found non defined labels: {non_defined_labels}, ",
|
||||
f"expecting labels: {class_names}"
|
||||
)
|
||||
|
||||
def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:
|
||||
"""
|
||||
decode class name, int -> str
|
||||
"""
|
||||
|
||||
return list(map(lambda x: self.index_to_class_name[x.item()], class_ints))
|
||||
|
||||
def init_class_names_to_index_mapping(self, class_names):
|
||||
self.class_name_to_index = {s: i for i, s in enumerate(class_names)}
|
||||
self.index_to_class_name = {i: s for i, s in enumerate(class_names)}
|
||||
logger.info(f"encoded class name to index: {self.class_name_to_index}")
|
||||
|
||||
def convert_label_column_to_int(
|
||||
self,
|
||||
data_dictionary: Dict[str, pd.DataFrame],
|
||||
dk: FreqaiDataKitchen,
|
||||
class_names: List[str]
|
||||
):
|
||||
self.init_class_names_to_index_mapping(class_names)
|
||||
self.encode_class_names(data_dictionary, dk, class_names)
|
||||
|
||||
def get_class_names(self) -> List[str]:
|
||||
if not self.class_names:
|
||||
raise ValueError(
|
||||
"self.class_names is empty, "
|
||||
"set self.freqai.class_names = ['class a', 'class b', 'class c'] "
|
||||
"inside IStrategy.set_freqai_targets method."
|
||||
)
|
||||
|
||||
return self.class_names
|
83
freqtrade/freqai/base_models/BasePyTorchModel.py
Normal file
83
freqtrade/freqai/base_models/BasePyTorchModel.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from time import time
|
||||
from typing import Any
|
||||
|
||||
import torch
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.freqai_interface import IFreqaiModel
|
||||
from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BasePyTorchModel(IFreqaiModel, ABC):
|
||||
"""
|
||||
Base class for PyTorch type models.
|
||||
User *must* inherit from this class and set fit() and predict() and
|
||||
data_convertor property.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(config=kwargs["config"])
|
||||
self.dd.model_type = "pytorch"
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size')
|
||||
self.splits = ["train", "test"] if test_size != 0 else ["train"]
|
||||
|
||||
def train(
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def data_convertor(self) -> PyTorchDataConvertor:
|
||||
"""
|
||||
a class responsible for converting `*_features` & `*_labels` pandas dataframes
|
||||
to pytorch tensors.
|
||||
"""
|
||||
raise NotImplementedError("Abstract property")
|
49
freqtrade/freqai/base_models/BasePyTorchRegressor.py
Normal file
49
freqtrade/freqai/base_models/BasePyTorchRegressor.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import logging
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BasePyTorchRegressor(BasePyTorchModel):
|
||||
"""
|
||||
A PyTorch implementation of a regressor.
|
||||
User must implement fit method
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_df, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
self.data_cleaning_predict(dk)
|
||||
x = self.data_convertor.convert_x(
|
||||
dk.data_dictionary["prediction_features"],
|
||||
device=self.device
|
||||
)
|
||||
y = self.model.model(x)
|
||||
pred_df = DataFrame(y.detach().numpy(), columns=[dk.label_list[0]])
|
||||
return (pred_df, dk.do_predict)
|
@@ -446,7 +446,7 @@ class FreqaiDataDrawer:
|
||||
dump(model, save_path / f"{dk.model_filename}_model.joblib")
|
||||
elif self.model_type == 'keras':
|
||||
model.save(save_path / f"{dk.model_filename}_model.h5")
|
||||
elif 'stable_baselines' in self.model_type or 'sb3_contrib' == self.model_type:
|
||||
elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
|
||||
model.save(save_path / f"{dk.model_filename}_model.zip")
|
||||
|
||||
if dk.svm_model is not None:
|
||||
@@ -496,7 +496,7 @@ class FreqaiDataDrawer:
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
||||
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any: # noqa: C901
|
||||
"""
|
||||
loads all data required to make a prediction on a sub-train time range
|
||||
:returns:
|
||||
@@ -537,6 +537,11 @@ class FreqaiDataDrawer:
|
||||
self.model_type, self.freqai_info['rl_config']['model_type'])
|
||||
MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type'])
|
||||
model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
|
||||
elif self.model_type == 'pytorch':
|
||||
import torch
|
||||
zip = torch.load(dk.data_path / f"{dk.model_filename}_model.zip")
|
||||
model = zip["pytrainer"]
|
||||
model = model.load_from_checkpoint(zip)
|
||||
|
||||
if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
|
||||
dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")
|
||||
|
@@ -83,6 +83,7 @@ class IFreqaiModel(ABC):
|
||||
self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
|
||||
if self.ft_params.get("inlier_metric_window", 0):
|
||||
self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
|
||||
self.class_names: List[str] = [] # used in classification subclasses
|
||||
self.pair_it = 0
|
||||
self.pair_it_train = 0
|
||||
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
|
||||
@@ -571,8 +572,9 @@ class IFreqaiModel(ABC):
|
||||
file_type = ".joblib"
|
||||
elif self.dd.model_type == 'keras':
|
||||
file_type = ".h5"
|
||||
elif 'stable_baselines' in self.dd.model_type or 'sb3_contrib' == self.dd.model_type:
|
||||
elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
|
||||
file_type = ".zip"
|
||||
|
||||
path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model{file_type}")
|
||||
file_exists = path_to_modelfile.is_file()
|
||||
if file_exists:
|
||||
|
@@ -14,16 +14,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CatboostClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
train_data = Pool(
|
||||
|
@@ -15,16 +15,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CatboostClassifierMultiTarget(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
cbc = CatBoostClassifier(
|
||||
|
@@ -14,16 +14,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CatboostRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
train_data = Pool(
|
||||
|
@@ -15,16 +15,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
cbr = CatBoostRegressor(
|
||||
|
@@ -12,16 +12,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class LightGBMClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
|
@@ -13,16 +13,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class LightGBMClassifierMultiTarget(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
lgb = LGBMClassifier(**self.model_training_parameters)
|
||||
|
@@ -12,18 +12,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class LightGBMRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Most regressors use the same function names and arguments e.g. user
|
||||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||
management will be properly handled by Freqai.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
|
@@ -13,16 +13,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class LightGBMRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
lgb = LGBMRegressor(**self.model_training_parameters)
|
||||
|
89
freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
Normal file
89
freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch
|
||||
|
||||
from freqtrade.freqai.base_models.BasePyTorchClassifier import BasePyTorchClassifier
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor,
|
||||
PyTorchDataConvertor)
|
||||
from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel
|
||||
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer
|
||||
|
||||
|
||||
class PyTorchMLPClassifier(BasePyTorchClassifier):
|
||||
"""
|
||||
This class implements the fit method of IFreqaiModel.
|
||||
in the fit method we initialize the model and trainer objects.
|
||||
the only requirement from the model is to be aligned to PyTorchClassifier
|
||||
predict method that expects the model to predict a tensor of type long.
|
||||
|
||||
parameters are passed via `model_training_parameters` under the freqai
|
||||
section in the config file. e.g:
|
||||
{
|
||||
...
|
||||
"freqai": {
|
||||
...
|
||||
"model_training_parameters" : {
|
||||
"learning_rate": 3e-4,
|
||||
"trainer_kwargs": {
|
||||
"max_iters": 5000,
|
||||
"batch_size": 64,
|
||||
"max_n_eval_batches": null,
|
||||
},
|
||||
"model_kwargs": {
|
||||
"hidden_dim": 512,
|
||||
"dropout_percent": 0.2,
|
||||
"n_layer": 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
@property
|
||||
def data_convertor(self) -> PyTorchDataConvertor:
|
||||
return DefaultPyTorchDataConvertor(
|
||||
target_tensor_type=torch.long,
|
||||
squeeze_target_tensor=True
|
||||
)
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
config = self.freqai_info.get("model_training_parameters", {})
|
||||
self.learning_rate: float = config.get("learning_rate", 3e-4)
|
||||
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
|
||||
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
:raises ValueError: If self.class_names is not defined in the parent class.
|
||||
"""
|
||||
|
||||
class_names = self.get_class_names()
|
||||
self.convert_label_column_to_int(data_dictionary, dk, class_names)
|
||||
n_features = data_dictionary["train_features"].shape[-1]
|
||||
model = PyTorchMLPModel(
|
||||
input_dim=n_features,
|
||||
output_dim=len(class_names),
|
||||
**self.model_kwargs
|
||||
)
|
||||
model.to(self.device)
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
|
||||
criterion = torch.nn.CrossEntropyLoss()
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
trainer = PyTorchModelTrainer(
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
criterion=criterion,
|
||||
model_meta_data={"class_names": class_names},
|
||||
device=self.device,
|
||||
init_model=init_model,
|
||||
data_convertor=self.data_convertor,
|
||||
**self.trainer_kwargs,
|
||||
)
|
||||
trainer.fit(data_dictionary, self.splits)
|
||||
return trainer
|
83
freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
Normal file
83
freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch
|
||||
|
||||
from freqtrade.freqai.base_models.BasePyTorchRegressor import BasePyTorchRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor,
|
||||
PyTorchDataConvertor)
|
||||
from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel
|
||||
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer
|
||||
|
||||
|
||||
class PyTorchMLPRegressor(BasePyTorchRegressor):
|
||||
"""
|
||||
This class implements the fit method of IFreqaiModel.
|
||||
in the fit method we initialize the model and trainer objects.
|
||||
the only requirement from the model is to be aligned to PyTorchRegressor
|
||||
predict method that expects the model to predict tensor of type float.
|
||||
the trainer defines the training loop.
|
||||
|
||||
parameters are passed via `model_training_parameters` under the freqai
|
||||
section in the config file. e.g:
|
||||
{
|
||||
...
|
||||
"freqai": {
|
||||
...
|
||||
"model_training_parameters" : {
|
||||
"learning_rate": 3e-4,
|
||||
"trainer_kwargs": {
|
||||
"max_iters": 5000,
|
||||
"batch_size": 64,
|
||||
"max_n_eval_batches": null,
|
||||
},
|
||||
"model_kwargs": {
|
||||
"hidden_dim": 512,
|
||||
"dropout_percent": 0.2,
|
||||
"n_layer": 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
@property
|
||||
def data_convertor(self) -> PyTorchDataConvertor:
|
||||
return DefaultPyTorchDataConvertor(target_tensor_type=torch.float)
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
config = self.freqai_info.get("model_training_parameters", {})
|
||||
self.learning_rate: float = config.get("learning_rate", 3e-4)
|
||||
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
|
||||
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
n_features = data_dictionary["train_features"].shape[-1]
|
||||
model = PyTorchMLPModel(
|
||||
input_dim=n_features,
|
||||
output_dim=1,
|
||||
**self.model_kwargs
|
||||
)
|
||||
model.to(self.device)
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
|
||||
criterion = torch.nn.MSELoss()
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
trainer = PyTorchModelTrainer(
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
criterion=criterion,
|
||||
device=self.device,
|
||||
init_model=init_model,
|
||||
data_convertor=self.data_convertor,
|
||||
**self.trainer_kwargs,
|
||||
)
|
||||
trainer.fit(data_dictionary, self.splits)
|
||||
return trainer
|
@@ -18,16 +18,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class XGBoostClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
|
@@ -18,16 +18,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class XGBoostRFClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
|
@@ -12,16 +12,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class XGBoostRFRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
|
@@ -12,16 +12,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class XGBoostRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
|
@@ -13,16 +13,20 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class XGBoostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
User created prediction model. The class inherits IFreqaiModel, which
|
||||
means it has full access to all Frequency AI functionality. Typically,
|
||||
users would use this to override the common `fit()`, `train()`, or
|
||||
`predict()` methods to add their custom data handling tools or change
|
||||
various aspects of the training that cannot be configured via the
|
||||
top level config.json file.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary holding all data for train, test,
|
||||
labels, weights
|
||||
:param dk: The datakitchen object for the current coin/model
|
||||
"""
|
||||
|
||||
xgb = XGBRegressor(**self.model_training_parameters)
|
||||
|
67
freqtrade/freqai/torch/PyTorchDataConvertor.py
Normal file
67
freqtrade/freqai/torch/PyTorchDataConvertor.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
|
||||
|
||||
class PyTorchDataConvertor(ABC):
|
||||
"""
|
||||
This class is responsible for converting `*_features` & `*_labels` pandas dataframes
|
||||
to pytorch tensors.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
|
||||
"""
|
||||
:param df: "*_features" dataframe.
|
||||
:param device: The device to use for training (e.g. 'cpu', 'cuda').
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
|
||||
"""
|
||||
:param df: "*_labels" dataframe.
|
||||
:param device: The device to use for training (e.g. 'cpu', 'cuda').
|
||||
"""
|
||||
|
||||
|
||||
class DefaultPyTorchDataConvertor(PyTorchDataConvertor):
|
||||
"""
|
||||
A default conversion that keeps features dataframe shapes.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target_tensor_type: Optional[torch.dtype] = None,
|
||||
squeeze_target_tensor: bool = False
|
||||
):
|
||||
"""
|
||||
:param target_tensor_type: type of target tensor, for classification use
|
||||
torch.long, for regressor use torch.float or torch.double.
|
||||
:param squeeze_target_tensor: controls the target shape, used for loss functions
|
||||
that requires 0D or 1D.
|
||||
"""
|
||||
self._target_tensor_type = target_tensor_type
|
||||
self._squeeze_target_tensor = squeeze_target_tensor
|
||||
|
||||
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
|
||||
x = torch.from_numpy(df.values).float()
|
||||
if device:
|
||||
x = x.to(device)
|
||||
|
||||
return [x]
|
||||
|
||||
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
|
||||
y = torch.from_numpy(df.values)
|
||||
|
||||
if self._target_tensor_type:
|
||||
y = y.to(self._target_tensor_type)
|
||||
|
||||
if self._squeeze_target_tensor:
|
||||
y = y.squeeze()
|
||||
|
||||
if device:
|
||||
y = y.to(device)
|
||||
|
||||
return [y]
|
97
freqtrade/freqai/torch/PyTorchMLPModel.py
Normal file
97
freqtrade/freqai/torch/PyTorchMLPModel.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PyTorchMLPModel(nn.Module):
|
||||
"""
|
||||
A multi-layer perceptron (MLP) model implemented using PyTorch.
|
||||
|
||||
This class mainly serves as a simple example for the integration of PyTorch model's
|
||||
to freqai. It is not optimized at all and should not be used for production purposes.
|
||||
|
||||
:param input_dim: The number of input features. This parameter specifies the number
|
||||
of features in the input data that the MLP will use to make predictions.
|
||||
:param output_dim: The number of output classes. This parameter specifies the number
|
||||
of classes that the MLP will predict.
|
||||
:param hidden_dim: The number of hidden units in each layer. This parameter controls
|
||||
the complexity of the MLP and determines how many nonlinear relationships the MLP
|
||||
can represent. Increasing the number of hidden units can increase the capacity of
|
||||
the MLP to model complex patterns, but it also increases the risk of overfitting
|
||||
the training data. Default: 256
|
||||
:param dropout_percent: The dropout rate for regularization. This parameter specifies
|
||||
the probability of dropping out a neuron during training to prevent overfitting.
|
||||
The dropout rate should be tuned carefully to balance between underfitting and
|
||||
overfitting. Default: 0.2
|
||||
:param n_layer: The number of layers in the MLP. This parameter specifies the number
|
||||
of layers in the MLP architecture. Adding more layers to the MLP can increase its
|
||||
capacity to model complex patterns, but it also increases the risk of overfitting
|
||||
the training data. Default: 1
|
||||
|
||||
:returns: The output of the MLP, with shape (batch_size, output_dim)
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim: int, output_dim: int, **kwargs):
|
||||
super().__init__()
|
||||
hidden_dim: int = kwargs.get("hidden_dim", 256)
|
||||
dropout_percent: int = kwargs.get("dropout_percent", 0.2)
|
||||
n_layer: int = kwargs.get("n_layer", 1)
|
||||
self.input_layer = nn.Linear(input_dim, hidden_dim)
|
||||
self.blocks = nn.Sequential(*[Block(hidden_dim, dropout_percent) for _ in range(n_layer)])
|
||||
self.output_layer = nn.Linear(hidden_dim, output_dim)
|
||||
self.relu = nn.ReLU()
|
||||
self.dropout = nn.Dropout(p=dropout_percent)
|
||||
|
||||
def forward(self, tensors: List[torch.Tensor]) -> torch.Tensor:
|
||||
x: torch.Tensor = tensors[0]
|
||||
x = self.relu(self.input_layer(x))
|
||||
x = self.dropout(x)
|
||||
x = self.blocks(x)
|
||||
x = self.output_layer(x)
|
||||
return x
|
||||
|
||||
|
||||
class Block(nn.Module):
|
||||
"""
|
||||
A building block for a multi-layer perceptron (MLP).
|
||||
|
||||
:param hidden_dim: The number of hidden units in the feedforward network.
|
||||
:param dropout_percent: The dropout rate for regularization.
|
||||
|
||||
:returns: torch.Tensor. with shape (batch_size, hidden_dim)
|
||||
"""
|
||||
|
||||
def __init__(self, hidden_dim: int, dropout_percent: int):
|
||||
super().__init__()
|
||||
self.ff = FeedForward(hidden_dim)
|
||||
self.dropout = nn.Dropout(p=dropout_percent)
|
||||
self.ln = nn.LayerNorm(hidden_dim)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
x = self.ff(self.ln(x))
|
||||
x = self.dropout(x)
|
||||
return x
|
||||
|
||||
|
||||
class FeedForward(nn.Module):
|
||||
"""
|
||||
A simple fully-connected feedforward neural network block.
|
||||
|
||||
:param hidden_dim: The number of hidden units in the block.
|
||||
:return: torch.Tensor. with shape (batch_size, hidden_dim)
|
||||
"""
|
||||
|
||||
def __init__(self, hidden_dim: int):
|
||||
super().__init__()
|
||||
self.net = nn.Sequential(
|
||||
nn.Linear(hidden_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return self.net(x)
|
208
freqtrade/freqai/torch/PyTorchModelTrainer.py
Normal file
208
freqtrade/freqai/torch/PyTorchModelTrainer.py
Normal file
@@ -0,0 +1,208 @@
|
||||
import logging
|
||||
import math
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.optim import Optimizer
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
|
||||
from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
|
||||
from freqtrade.freqai.torch.PyTorchTrainerInterface import PyTorchTrainerInterface
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PyTorchModelTrainer(PyTorchTrainerInterface):
|
||||
def __init__(
|
||||
self,
|
||||
model: nn.Module,
|
||||
optimizer: Optimizer,
|
||||
criterion: nn.Module,
|
||||
device: str,
|
||||
init_model: Dict,
|
||||
data_convertor: PyTorchDataConvertor,
|
||||
model_meta_data: Dict[str, Any] = {},
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
:param model: The PyTorch model to be trained.
|
||||
:param optimizer: The optimizer to use for training.
|
||||
:param criterion: The loss function to use for training.
|
||||
:param device: The device to use for training (e.g. 'cpu', 'cuda').
|
||||
:param init_model: A dictionary containing the initial model/optimizer
|
||||
state_dict and model_meta_data saved by self.save() method.
|
||||
:param model_meta_data: Additional metadata about the model (optional).
|
||||
:param data_convertor: convertor from pd.DataFrame to torch.tensor.
|
||||
:param max_iters: The number of training iterations to run.
|
||||
iteration here refers to the number of times we call
|
||||
self.optimizer.step(). used to calculate n_epochs.
|
||||
:param batch_size: The size of the batches to use during training.
|
||||
:param max_n_eval_batches: The maximum number batches to use for evaluation.
|
||||
"""
|
||||
self.model = model
|
||||
self.optimizer = optimizer
|
||||
self.criterion = criterion
|
||||
self.model_meta_data = model_meta_data
|
||||
self.device = device
|
||||
self.max_iters: int = kwargs.get("max_iters", 100)
|
||||
self.batch_size: int = kwargs.get("batch_size", 64)
|
||||
self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
|
||||
self.data_convertor = data_convertor
|
||||
if init_model:
|
||||
self.load_from_checkpoint(init_model)
|
||||
|
||||
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
|
||||
"""
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param splits: splits to use in training, splits must contain "train",
|
||||
optional "test" could be added by setting freqai.data_split_parameters.test_size > 0
|
||||
in the config file.
|
||||
|
||||
- Calculates the predicted output for the batch using the PyTorch model.
|
||||
- Calculates the loss between the predicted and actual output using a loss function.
|
||||
- Computes the gradients of the loss with respect to the model's parameters using
|
||||
backpropagation.
|
||||
- Updates the model's parameters using an optimizer.
|
||||
"""
|
||||
data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits)
|
||||
epochs = self.calc_n_epochs(
|
||||
n_obs=len(data_dictionary["train_features"]),
|
||||
batch_size=self.batch_size,
|
||||
n_iters=self.max_iters
|
||||
)
|
||||
for epoch in range(1, epochs + 1):
|
||||
# training
|
||||
losses = []
|
||||
for i, batch_data in enumerate(data_loaders_dictionary["train"]):
|
||||
|
||||
for tensor in batch_data:
|
||||
tensor.to(self.device)
|
||||
|
||||
xb = batch_data[:-1]
|
||||
yb = batch_data[-1]
|
||||
yb_pred = self.model(xb)
|
||||
loss = self.criterion(yb_pred, yb)
|
||||
|
||||
self.optimizer.zero_grad(set_to_none=True)
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
losses.append(loss.item())
|
||||
train_loss = sum(losses) / len(losses)
|
||||
log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}"
|
||||
|
||||
# evaluation
|
||||
if "test" in splits:
|
||||
test_loss = self.estimate_loss(
|
||||
data_loaders_dictionary,
|
||||
self.max_n_eval_batches,
|
||||
"test"
|
||||
)
|
||||
log_message += f" ; test loss {test_loss:.4f}"
|
||||
|
||||
logger.info(log_message)
|
||||
|
||||
@torch.no_grad()
|
||||
def estimate_loss(
|
||||
self,
|
||||
data_loader_dictionary: Dict[str, DataLoader],
|
||||
max_n_eval_batches: Optional[int],
|
||||
split: str,
|
||||
) -> float:
|
||||
self.model.eval()
|
||||
n_batches = 0
|
||||
losses = []
|
||||
for i, batch_data in enumerate(data_loader_dictionary[split]):
|
||||
if max_n_eval_batches and i > max_n_eval_batches:
|
||||
n_batches += 1
|
||||
break
|
||||
|
||||
for tensor in batch_data:
|
||||
tensor.to(self.device)
|
||||
|
||||
xb = batch_data[:-1]
|
||||
yb = batch_data[-1]
|
||||
yb_pred = self.model(xb)
|
||||
loss = self.criterion(yb_pred, yb)
|
||||
losses.append(loss.item())
|
||||
|
||||
self.model.train()
|
||||
return sum(losses) / len(losses)
|
||||
|
||||
def create_data_loaders_dictionary(
|
||||
self,
|
||||
data_dictionary: Dict[str, pd.DataFrame],
|
||||
splits: List[str]
|
||||
) -> Dict[str, DataLoader]:
|
||||
"""
|
||||
Converts the input data to PyTorch tensors using a data loader.
|
||||
"""
|
||||
data_loader_dictionary = {}
|
||||
for split in splits:
|
||||
x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"])
|
||||
y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"])
|
||||
dataset = TensorDataset(*x, *y)
|
||||
data_loader = DataLoader(
|
||||
dataset,
|
||||
batch_size=self.batch_size,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
num_workers=0,
|
||||
)
|
||||
data_loader_dictionary[split] = data_loader
|
||||
|
||||
return data_loader_dictionary
|
||||
|
||||
@staticmethod
|
||||
def calc_n_epochs(n_obs: int, batch_size: int, n_iters: int) -> int:
|
||||
"""
|
||||
Calculates the number of epochs required to reach the maximum number
|
||||
of iterations specified in the model training parameters.
|
||||
|
||||
the motivation here is that `max_iters` is easier to optimize and keep stable,
|
||||
across different n_obs - the number of data points.
|
||||
"""
|
||||
|
||||
n_batches = math.ceil(n_obs // batch_size)
|
||||
epochs = math.ceil(n_iters // n_batches)
|
||||
if epochs <= 10:
|
||||
logger.warning("User set `max_iters` in such a way that the trainer will only perform "
|
||||
f" {epochs} epochs. Please consider increasing this value accordingly")
|
||||
if epochs <= 1:
|
||||
logger.warning("Epochs set to 1. Please review your `max_iters` value")
|
||||
epochs = 1
|
||||
return epochs
|
||||
|
||||
def save(self, path: Path):
|
||||
"""
|
||||
- Saving any nn.Module state_dict
|
||||
- Saving model_meta_data, this dict should contain any additional data that the
|
||||
user needs to store. e.g class_names for classification models.
|
||||
"""
|
||||
|
||||
torch.save({
|
||||
"model_state_dict": self.model.state_dict(),
|
||||
"optimizer_state_dict": self.optimizer.state_dict(),
|
||||
"model_meta_data": self.model_meta_data,
|
||||
"pytrainer": self
|
||||
}, path)
|
||||
|
||||
def load(self, path: Path):
|
||||
checkpoint = torch.load(path)
|
||||
return self.load_from_checkpoint(checkpoint)
|
||||
|
||||
def load_from_checkpoint(self, checkpoint: Dict):
|
||||
"""
|
||||
when using continual_learning, DataDrawer will load the dictionary
|
||||
(containing state dicts and model_meta_data) by calling torch.load(path).
|
||||
you can access this dict from any class that inherits IFreqaiModel by calling
|
||||
get_init_model method.
|
||||
"""
|
||||
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
|
||||
self.model_meta_data = checkpoint["model_meta_data"]
|
||||
return self
|
53
freqtrade/freqai/torch/PyTorchTrainerInterface.py
Normal file
53
freqtrade/freqai/torch/PyTorchTrainerInterface.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
class PyTorchTrainerInterface(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]) -> None:
|
||||
"""
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param splits: splits to use in training, splits must contain "train",
|
||||
optional "test" could be added by setting freqai.data_split_parameters.test_size > 0
|
||||
in the config file.
|
||||
|
||||
- Calculates the predicted output for the batch using the PyTorch model.
|
||||
- Calculates the loss between the predicted and actual output using a loss function.
|
||||
- Computes the gradients of the loss with respect to the model's parameters using
|
||||
backpropagation.
|
||||
- Updates the model's parameters using an optimizer.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def save(self, path: Path) -> None:
|
||||
"""
|
||||
- Saving any nn.Module state_dict
|
||||
- Saving model_meta_data, this dict should contain any additional data that the
|
||||
user needs to store. e.g class_names for classification models.
|
||||
"""
|
||||
|
||||
def load(self, path: Path) -> nn.Module:
|
||||
"""
|
||||
:param path: path to zip file.
|
||||
:returns: pytorch model.
|
||||
"""
|
||||
checkpoint = torch.load(path)
|
||||
return self.load_from_checkpoint(checkpoint)
|
||||
|
||||
@abstractmethod
|
||||
def load_from_checkpoint(self, checkpoint: Dict) -> nn.Module:
|
||||
"""
|
||||
when using continual_learning, DataDrawer will load the dictionary
|
||||
(containing state dicts and model_meta_data) by calling torch.load(path).
|
||||
you can access this dict from any class that inherits IFreqaiModel by calling
|
||||
get_init_model method.
|
||||
:checkpoint checkpoint: dict containing the model & optimizer state dicts,
|
||||
model_meta_data, etc..
|
||||
"""
|
0
freqtrade/freqai/torch/__init__.py
Normal file
0
freqtrade/freqai/torch/__init__.py
Normal file
@@ -223,6 +223,7 @@ class FreqaiExampleHybridStrategy(IStrategy):
|
||||
:param metadata: metadata of current pair
|
||||
usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"]
|
||||
"""
|
||||
self.freqai.class_names = ["down", "up"]
|
||||
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-50) >
|
||||
dataframe["close"], 'up', 'down')
|
||||
|
||||
|
Reference in New Issue
Block a user