Merge pull request #8297 from Yinon-Polak/feat/add-pytorch-model-support

Feat/add pytorch model support
This commit is contained in:
Robert Caulk 2023-04-11 15:40:12 +02:00 committed by GitHub
commit 4ab047dfa7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 1248 additions and 86 deletions

View File

@ -12,6 +12,7 @@ TAG=$(echo "${BRANCH_NAME}" | sed -e "s/\//_/g")
TAG_PLOT=${TAG}_plot
TAG_FREQAI=${TAG}_freqai
TAG_FREQAI_RL=${TAG_FREQAI}rl
TAG_FREQAI_TORCH=${TAG_FREQAI}torch
TAG_PI="${TAG}_pi"
TAG_ARM=${TAG}_arm
@ -84,6 +85,10 @@ docker manifest push -p ${IMAGE_NAME}:${TAG_FREQAI}
docker manifest create ${IMAGE_NAME}:${TAG_FREQAI_RL} ${CACHE_IMAGE}:${TAG_FREQAI_RL} ${CACHE_IMAGE}:${TAG_FREQAI_RL_ARM}
docker manifest push -p ${IMAGE_NAME}:${TAG_FREQAI_RL}
# Create special Torch tag - which is identical to the RL tag.
docker manifest create ${IMAGE_NAME}:${TAG_FREQAI_TORCH} ${CACHE_IMAGE}:${TAG_FREQAI_RL} ${CACHE_IMAGE}:${TAG_FREQAI_RL_ARM}
docker manifest push -p ${IMAGE_NAME}:${TAG_FREQAI_TORCH}
# copy images to ghcr.io
alias crane="docker run --rm -i -v $(pwd)/.crane:/home/nonroot/.docker/ gcr.io/go-containerregistry/crane"
@ -93,6 +98,7 @@ chmod a+rwx .crane
echo "${GHCR_TOKEN}" | crane auth login ghcr.io -u "${GHCR_USERNAME}" --password-stdin
crane copy ${IMAGE_NAME}:${TAG_FREQAI_RL} ${GHCR_IMAGE_NAME}:${TAG_FREQAI_RL}
crane copy ${IMAGE_NAME}:${TAG_FREQAI_RL} ${GHCR_IMAGE_NAME}:${TAG_FREQAI_TORCH}
crane copy ${IMAGE_NAME}:${TAG_FREQAI} ${GHCR_IMAGE_NAME}:${TAG_FREQAI}
crane copy ${IMAGE_NAME}:${TAG_PLOT} ${GHCR_IMAGE_NAME}:${TAG_PLOT}
crane copy ${IMAGE_NAME}:${TAG} ${GHCR_IMAGE_NAME}:${TAG}

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -236,3 +236,161 @@ If you want to predict multiple targets you must specify all labels in the same
df['&s-up_or_down'] = np.where( df["close"].shift(-100) > df["close"], 'up', 'down')
df['&s-up_or_down'] = np.where( df["close"].shift(-100) == df["close"], 'same', df['&s-up_or_down'])
```
## PyTorch Module
### Quick start
The easiest way to quickly run a pytorch model is with the following command (for regression task):
```bash
freqtrade trade --config config_examples/config_freqai.example.json --strategy FreqaiExampleStrategy --freqaimodel PyTorchMLPRegressor --strategy-path freqtrade/templates
```
!!! note "Installation/docker"
The PyTorch module requires large packages such as `torch`, which should be explicitly requested during `./setup.sh -i` by answering "y" to the question "Do you also want dependencies for freqai-rl or PyTorch (~700mb additional space required) [y/N]?".
Users who prefer docker should ensure they use the docker image appended with `_freqaitorch`.
### Structure
#### Model
You can construct your own Neural Network architecture in PyTorch by simply defining your `nn.Module` class inside your custom [`IFreqaiModel` file](#using-different-prediction-models) and then using that class in your `def train()` function. Here is an example of logistic regression model implementation using PyTorch (should be used with nn.BCELoss criterion) for classification tasks.
```python
class LogisticRegression(nn.Module):
def __init__(self, input_size: int):
super().__init__()
# Define your layers
self.linear = nn.Linear(input_size, 1)
self.activation = nn.Sigmoid()
def forward(self, x: torch.Tensor) -> torch.Tensor:
# Define the forward pass
out = self.linear(x)
out = self.activation(out)
return out
class MyCoolPyTorchClassifier(BasePyTorchClassifier):
"""
This is a custom IFreqaiModel showing how a user might setup their own
custom Neural Network architecture for their training.
"""
@property
def data_convertor(self) -> PyTorchDataConvertor:
return DefaultPyTorchDataConvertor(target_tensor_type=torch.float)
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
class_names = self.get_class_names()
self.convert_label_column_to_int(data_dictionary, dk, class_names)
n_features = data_dictionary["train_features"].shape[-1]
model = LogisticRegression(
input_dim=n_features
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.CrossEntropyLoss()
init_model = self.get_init_model(dk.pair)
trainer = PyTorchModelTrainer(
model=model,
optimizer=optimizer,
criterion=criterion,
model_meta_data={"class_names": class_names},
device=self.device,
init_model=init_model,
data_convertor=self.data_convertor,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)
return trainer
```
#### Trainer
The `PyTorchModelTrainer` performs the idiomatic PyTorch train loop:
Define our model, loss function, and optimizer, and then move them to the appropriate device (GPU or CPU). Inside the loop, we iterate through the batches in the dataloader, move the data to the device, compute the prediction and loss, backpropagate, and update the model parameters using the optimizer.
In addition, the trainer is responsible for the following:
- saving and loading the model
- converting the data from `pandas.DataFrame` to `torch.Tensor`.
#### Integration with Freqai module
Like all freqai models, PyTorch models inherit `IFreqaiModel`. `IFreqaiModel` declares three abstract methods: `train`, `fit`, and `predict`. we implement these methods in three levels of hierarchy.
From top to bottom:
1. `BasePyTorchModel` - Implements the `train` method. all `BasePyTorch*` inherit it. responsible for general data preparation (e.g., data normalization) and calling the `fit` method. Sets `device` attribute used by children classes. Sets `model_type` attribute used by the parent class.
2. `BasePyTorch*` - Implements the `predict` method. Here, the `*` represents a group of algorithms, such as classifiers or regressors. responsible for data preprocessing, predicting, and postprocessing if needed.
3. `PyTorch*Classifier` / `PyTorch*Regressor` - implements the `fit` method. responsible for the main train flaw, where we initialize the trainer and model objects.
![image](assets/freqai_pytorch-diagram.png)
#### Full example
Building a PyTorch regressor using MLP (multilayer perceptron) model, MSELoss criterion, and AdamW optimizer.
```python
class PyTorchMLPRegressor(BasePyTorchRegressor):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=1,
**self.model_kwargs
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.MSELoss()
init_model = self.get_init_model(dk.pair)
trainer = PyTorchModelTrainer(
model=model,
optimizer=optimizer,
criterion=criterion,
device=self.device,
init_model=init_model,
target_tensor_type=torch.float,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary)
return trainer
```
Here we create a `PyTorchMLPRegressor` class that implements the `fit` method. The `fit` method specifies the training building blocks: model, optimizer, criterion, and trainer. We inherit both `BasePyTorchRegressor` and `BasePyTorchModel`, where the former implements the `predict` method that is suitable for our regression task, and the latter implements the train method.
??? Note "Setting Class Names for Classifiers"
When using classifiers, the user must declare the class names (or targets) by overriding the `IFreqaiModel.class_names` attribute. This is achieved by setting `self.freqai.class_names` in the FreqAI strategy inside the `set_freqai_targets` method.
For example, if you are using a binary classifier to predict price movements as up or down, you can set the class names as follows:
```python
def set_freqai_targets(self, dataframe: DataFrame, metadata: Dict, **kwargs):
self.freqai.class_names = ["down", "up"]
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) >
dataframe["close"], 'up', 'down')
return dataframe
```
To see a full example, you can refer to the [classifier test strategy class](https://github.com/freqtrade/freqtrade/blob/develop/tests/strategy/strats/freqai_test_classifier.py).

View File

@ -86,6 +86,27 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| `randomize_starting_position` | Randomize the starting point of each episode to avoid overfitting. <br> **Datatype:** bool. <br> Default: `False`.
| `drop_ohlc_from_features` | Do not include the normalized ohlc data in the feature set passed to the agent during training (ohlc will still be used for driving the environment in all cases) <br> **Datatype:** Boolean. <br> **Default:** `False`
### PyTorch parameters
#### general
| Parameter | Description |
|------------|-------------|
| | **Model training parameters within the `freqai.model_training_parameters` sub dictionary**
| `learning_rate` | Learning rate to be passed to the optimizer. <br> **Datatype:** float. <br> Default: `3e-4`.
| `model_kwargs` | Parameters to be passed to the model class. <br> **Datatype:** dict. <br> Default: `{}`.
| `trainer_kwargs` | Parameters to be passed to the trainer class. <br> **Datatype:** dict. <br> Default: `{}`.
#### trainer_kwargs
| Parameter | Description |
|------------|-------------|
| | **Model training parameters within the `freqai.model_training_parameters.model_kwargs` sub dictionary**
| `max_iters` | The number of training iterations to run. iteration here refers to the number of times we call self.optimizer.step(). used to calculate n_epochs. <br> **Datatype:** int. <br> Default: `100`.
| `batch_size` | The size of the batches to use during training.. <br> **Datatype:** int. <br> Default: `64`.
| `max_n_eval_batches` | The maximum number batches to use for evaluation.. <br> **Datatype:** int, optional. <br> Default: `None`.
### Additional parameters
| Parameter | Description |

View File

@ -0,0 +1,147 @@
import logging
from typing import Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
import torch
from pandas import DataFrame
from torch.nn import functional as F
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class BasePyTorchClassifier(BasePyTorchModel):
"""
A PyTorch implementation of a classifier.
User must implement fit method
Important!
- User must declare the target class names in the strategy,
under IStrategy.set_freqai_targets method.
for example, in your strategy:
```
def set_freqai_targets(self, dataframe: DataFrame, metadata: Dict, **kwargs):
self.freqai.class_names = ["down", "up"]
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) >
dataframe["close"], 'up', 'down')
return dataframe
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.class_name_to_index = None
self.index_to_class_name = None
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
:raises ValueError: if 'class_names' doesn't exist in model meta_data.
"""
class_names = self.model.model_meta_data.get("class_names", None)
if not class_names:
raise ValueError(
"Missing class names. "
"self.model.model_meta_data['class_names'] is None."
)
if not self.class_name_to_index:
self.init_class_names_to_index_mapping(class_names)
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk)
x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"],
device=self.device
)
logits = self.model.model(x)
probs = F.softmax(logits, dim=-1)
predicted_classes = torch.argmax(probs, dim=-1)
predicted_classes_str = self.decode_class_names(predicted_classes)
pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)
pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
return (pred_df, dk.do_predict)
def encode_class_names(
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str],
):
"""
encode class name, str -> int
assuming first column of *_labels data frame to be the target column
containing the class names
"""
target_column_name = dk.label_list[0]
for split in self.splits:
label_df = data_dictionary[f"{split}_labels"]
self.assert_valid_class_names(label_df[target_column_name], class_names)
label_df[target_column_name] = list(
map(lambda x: self.class_name_to_index[x], label_df[target_column_name])
)
@staticmethod
def assert_valid_class_names(
target_column: pd.Series,
class_names: List[str]
):
non_defined_labels = set(target_column) - set(class_names)
if len(non_defined_labels) != 0:
raise OperationalException(
f"Found non defined labels: {non_defined_labels}, ",
f"expecting labels: {class_names}"
)
def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:
"""
decode class name, int -> str
"""
return list(map(lambda x: self.index_to_class_name[x.item()], class_ints))
def init_class_names_to_index_mapping(self, class_names):
self.class_name_to_index = {s: i for i, s in enumerate(class_names)}
self.index_to_class_name = {i: s for i, s in enumerate(class_names)}
logger.info(f"encoded class name to index: {self.class_name_to_index}")
def convert_label_column_to_int(
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str]
):
self.init_class_names_to_index_mapping(class_names)
self.encode_class_names(data_dictionary, dk, class_names)
def get_class_names(self) -> List[str]:
if not self.class_names:
raise ValueError(
"self.class_names is empty, "
"set self.freqai.class_names = ['class a', 'class b', 'class c'] "
"inside IStrategy.set_freqai_targets method."
)
return self.class_names

View File

@ -0,0 +1,83 @@
import logging
from abc import ABC, abstractmethod
from time import time
from typing import Any
import torch
from pandas import DataFrame
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
logger = logging.getLogger(__name__)
class BasePyTorchModel(IFreqaiModel, ABC):
"""
Base class for PyTorch type models.
User *must* inherit from this class and set fit() and predict() and
data_convertor property.
"""
def __init__(self, **kwargs):
super().__init__(config=kwargs["config"])
self.dd.model_type = "pytorch"
self.device = "cuda" if torch.cuda.is_available() else "cpu"
test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size')
self.splits = ["train", "test"] if test_size != 0 else ["train"]
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_df: Full dataframe for the current training period
:return:
:model: Trained model which can be used to inference (self.predict)
"""
logger.info(f"-------------------- Starting training {pair} --------------------")
start_time = time()
features_filtered, labels_filtered = dk.filter_features(
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dk)
logger.info(
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
)
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
model = self.fit(data_dictionary, dk)
end_time = time()
logger.info(f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------")
return model
@property
@abstractmethod
def data_convertor(self) -> PyTorchDataConvertor:
"""
a class responsible for converting `*_features` & `*_labels` pandas dataframes
to pytorch tensors.
"""
raise NotImplementedError("Abstract property")

View File

@ -0,0 +1,49 @@
import logging
from typing import Tuple
import numpy as np
import numpy.typing as npt
from pandas import DataFrame
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class BasePyTorchRegressor(BasePyTorchModel):
"""
A PyTorch implementation of a regressor.
User must implement fit method
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk)
x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"],
device=self.device
)
y = self.model.model(x)
pred_df = DataFrame(y.detach().numpy(), columns=[dk.label_list[0]])
return (pred_df, dk.do_predict)

View File

@ -446,7 +446,7 @@ class FreqaiDataDrawer:
dump(model, save_path / f"{dk.model_filename}_model.joblib")
elif self.model_type == 'keras':
model.save(save_path / f"{dk.model_filename}_model.h5")
elif 'stable_baselines' in self.model_type or 'sb3_contrib' == self.model_type:
elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
model.save(save_path / f"{dk.model_filename}_model.zip")
if dk.svm_model is not None:
@ -496,7 +496,7 @@ class FreqaiDataDrawer:
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any: # noqa: C901
"""
loads all data required to make a prediction on a sub-train time range
:returns:
@ -537,6 +537,11 @@ class FreqaiDataDrawer:
self.model_type, self.freqai_info['rl_config']['model_type'])
MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type'])
model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
elif self.model_type == 'pytorch':
import torch
zip = torch.load(dk.data_path / f"{dk.model_filename}_model.zip")
model = zip["pytrainer"]
model = model.load_from_checkpoint(zip)
if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")

View File

@ -83,6 +83,7 @@ class IFreqaiModel(ABC):
self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
if self.ft_params.get("inlier_metric_window", 0):
self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
self.class_names: List[str] = [] # used in classification subclasses
self.pair_it = 0
self.pair_it_train = 0
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
@ -571,8 +572,9 @@ class IFreqaiModel(ABC):
file_type = ".joblib"
elif self.dd.model_type == 'keras':
file_type = ".h5"
elif 'stable_baselines' in self.dd.model_type or 'sb3_contrib' == self.dd.model_type:
elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
file_type = ".zip"
path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model{file_type}")
file_exists = path_to_modelfile.is_file()
if file_exists:

View File

@ -14,16 +14,20 @@ logger = logging.getLogger(__name__)
class CatboostClassifier(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
train_data = Pool(

View File

@ -15,16 +15,20 @@ logger = logging.getLogger(__name__)
class CatboostClassifierMultiTarget(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
cbc = CatBoostClassifier(

View File

@ -14,16 +14,20 @@ logger = logging.getLogger(__name__)
class CatboostRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
train_data = Pool(

View File

@ -15,16 +15,20 @@ logger = logging.getLogger(__name__)
class CatboostRegressorMultiTarget(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
cbr = CatBoostRegressor(

View File

@ -12,16 +12,20 @@ logger = logging.getLogger(__name__)
class LightGBMClassifier(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:

View File

@ -13,16 +13,20 @@ logger = logging.getLogger(__name__)
class LightGBMClassifierMultiTarget(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
lgb = LGBMClassifier(**self.model_training_parameters)

View File

@ -12,18 +12,20 @@ logger = logging.getLogger(__name__)
class LightGBMRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
management will be properly handled by Freqai.
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:

View File

@ -13,16 +13,20 @@ logger = logging.getLogger(__name__)
class LightGBMRegressorMultiTarget(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
lgb = LGBMRegressor(**self.model_training_parameters)

View File

@ -0,0 +1,89 @@
from typing import Any, Dict
import torch
from freqtrade.freqai.base_models.BasePyTorchClassifier import BasePyTorchClassifier
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor,
PyTorchDataConvertor)
from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer
class PyTorchMLPClassifier(BasePyTorchClassifier):
"""
This class implements the fit method of IFreqaiModel.
in the fit method we initialize the model and trainer objects.
the only requirement from the model is to be aligned to PyTorchClassifier
predict method that expects the model to predict a tensor of type long.
parameters are passed via `model_training_parameters` under the freqai
section in the config file. e.g:
{
...
"freqai": {
...
"model_training_parameters" : {
"learning_rate": 3e-4,
"trainer_kwargs": {
"max_iters": 5000,
"batch_size": 64,
"max_n_eval_batches": null,
},
"model_kwargs": {
"hidden_dim": 512,
"dropout_percent": 0.2,
"n_layer": 1,
},
}
}
}
"""
@property
def data_convertor(self) -> PyTorchDataConvertor:
return DefaultPyTorchDataConvertor(
target_tensor_type=torch.long,
squeeze_target_tensor=True
)
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
:raises ValueError: If self.class_names is not defined in the parent class.
"""
class_names = self.get_class_names()
self.convert_label_column_to_int(data_dictionary, dk, class_names)
n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=len(class_names),
**self.model_kwargs
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.CrossEntropyLoss()
init_model = self.get_init_model(dk.pair)
trainer = PyTorchModelTrainer(
model=model,
optimizer=optimizer,
criterion=criterion,
model_meta_data={"class_names": class_names},
device=self.device,
init_model=init_model,
data_convertor=self.data_convertor,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)
return trainer

View File

@ -0,0 +1,83 @@
from typing import Any, Dict
import torch
from freqtrade.freqai.base_models.BasePyTorchRegressor import BasePyTorchRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor,
PyTorchDataConvertor)
from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer
class PyTorchMLPRegressor(BasePyTorchRegressor):
"""
This class implements the fit method of IFreqaiModel.
in the fit method we initialize the model and trainer objects.
the only requirement from the model is to be aligned to PyTorchRegressor
predict method that expects the model to predict tensor of type float.
the trainer defines the training loop.
parameters are passed via `model_training_parameters` under the freqai
section in the config file. e.g:
{
...
"freqai": {
...
"model_training_parameters" : {
"learning_rate": 3e-4,
"trainer_kwargs": {
"max_iters": 5000,
"batch_size": 64,
"max_n_eval_batches": null,
},
"model_kwargs": {
"hidden_dim": 512,
"dropout_percent": 0.2,
"n_layer": 1,
},
}
}
}
"""
@property
def data_convertor(self) -> PyTorchDataConvertor:
return DefaultPyTorchDataConvertor(target_tensor_type=torch.float)
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=1,
**self.model_kwargs
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.MSELoss()
init_model = self.get_init_model(dk.pair)
trainer = PyTorchModelTrainer(
model=model,
optimizer=optimizer,
criterion=criterion,
device=self.device,
init_model=init_model,
data_convertor=self.data_convertor,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)
return trainer

View File

@ -18,16 +18,20 @@ logger = logging.getLogger(__name__)
class XGBoostClassifier(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
X = data_dictionary["train_features"].to_numpy()

View File

@ -18,16 +18,20 @@ logger = logging.getLogger(__name__)
class XGBoostRFClassifier(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
X = data_dictionary["train_features"].to_numpy()

View File

@ -12,16 +12,20 @@ logger = logging.getLogger(__name__)
class XGBoostRFRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
X = data_dictionary["train_features"]

View File

@ -12,16 +12,20 @@ logger = logging.getLogger(__name__)
class XGBoostRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
X = data_dictionary["train_features"]

View File

@ -13,16 +13,20 @@ logger = logging.getLogger(__name__)
class XGBoostRegressorMultiTarget(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
xgb = XGBRegressor(**self.model_training_parameters)

View File

@ -0,0 +1,67 @@
from abc import ABC, abstractmethod
from typing import List, Optional
import pandas as pd
import torch
class PyTorchDataConvertor(ABC):
"""
This class is responsible for converting `*_features` & `*_labels` pandas dataframes
to pytorch tensors.
"""
@abstractmethod
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
"""
:param df: "*_features" dataframe.
:param device: The device to use for training (e.g. 'cpu', 'cuda').
"""
@abstractmethod
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
"""
:param df: "*_labels" dataframe.
:param device: The device to use for training (e.g. 'cpu', 'cuda').
"""
class DefaultPyTorchDataConvertor(PyTorchDataConvertor):
"""
A default conversion that keeps features dataframe shapes.
"""
def __init__(
self,
target_tensor_type: Optional[torch.dtype] = None,
squeeze_target_tensor: bool = False
):
"""
:param target_tensor_type: type of target tensor, for classification use
torch.long, for regressor use torch.float or torch.double.
:param squeeze_target_tensor: controls the target shape, used for loss functions
that requires 0D or 1D.
"""
self._target_tensor_type = target_tensor_type
self._squeeze_target_tensor = squeeze_target_tensor
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
x = torch.from_numpy(df.values).float()
if device:
x = x.to(device)
return [x]
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
y = torch.from_numpy(df.values)
if self._target_tensor_type:
y = y.to(self._target_tensor_type)
if self._squeeze_target_tensor:
y = y.squeeze()
if device:
y = y.to(device)
return [y]

View File

@ -0,0 +1,97 @@
import logging
from typing import List
import torch
import torch.nn as nn
logger = logging.getLogger(__name__)
class PyTorchMLPModel(nn.Module):
"""
A multi-layer perceptron (MLP) model implemented using PyTorch.
This class mainly serves as a simple example for the integration of PyTorch model's
to freqai. It is not optimized at all and should not be used for production purposes.
:param input_dim: The number of input features. This parameter specifies the number
of features in the input data that the MLP will use to make predictions.
:param output_dim: The number of output classes. This parameter specifies the number
of classes that the MLP will predict.
:param hidden_dim: The number of hidden units in each layer. This parameter controls
the complexity of the MLP and determines how many nonlinear relationships the MLP
can represent. Increasing the number of hidden units can increase the capacity of
the MLP to model complex patterns, but it also increases the risk of overfitting
the training data. Default: 256
:param dropout_percent: The dropout rate for regularization. This parameter specifies
the probability of dropping out a neuron during training to prevent overfitting.
The dropout rate should be tuned carefully to balance between underfitting and
overfitting. Default: 0.2
:param n_layer: The number of layers in the MLP. This parameter specifies the number
of layers in the MLP architecture. Adding more layers to the MLP can increase its
capacity to model complex patterns, but it also increases the risk of overfitting
the training data. Default: 1
:returns: The output of the MLP, with shape (batch_size, output_dim)
"""
def __init__(self, input_dim: int, output_dim: int, **kwargs):
super().__init__()
hidden_dim: int = kwargs.get("hidden_dim", 256)
dropout_percent: int = kwargs.get("dropout_percent", 0.2)
n_layer: int = kwargs.get("n_layer", 1)
self.input_layer = nn.Linear(input_dim, hidden_dim)
self.blocks = nn.Sequential(*[Block(hidden_dim, dropout_percent) for _ in range(n_layer)])
self.output_layer = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=dropout_percent)
def forward(self, tensors: List[torch.Tensor]) -> torch.Tensor:
x: torch.Tensor = tensors[0]
x = self.relu(self.input_layer(x))
x = self.dropout(x)
x = self.blocks(x)
x = self.output_layer(x)
return x
class Block(nn.Module):
"""
A building block for a multi-layer perceptron (MLP).
:param hidden_dim: The number of hidden units in the feedforward network.
:param dropout_percent: The dropout rate for regularization.
:returns: torch.Tensor. with shape (batch_size, hidden_dim)
"""
def __init__(self, hidden_dim: int, dropout_percent: int):
super().__init__()
self.ff = FeedForward(hidden_dim)
self.dropout = nn.Dropout(p=dropout_percent)
self.ln = nn.LayerNorm(hidden_dim)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.ff(self.ln(x))
x = self.dropout(x)
return x
class FeedForward(nn.Module):
"""
A simple fully-connected feedforward neural network block.
:param hidden_dim: The number of hidden units in the block.
:return: torch.Tensor. with shape (batch_size, hidden_dim)
"""
def __init__(self, hidden_dim: int):
super().__init__()
self.net = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.net(x)

View File

@ -0,0 +1,208 @@
import logging
import math
from pathlib import Path
from typing import Any, Dict, List, Optional
import pandas as pd
import torch
from torch import nn
from torch.optim import Optimizer
from torch.utils.data import DataLoader, TensorDataset
from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
from freqtrade.freqai.torch.PyTorchTrainerInterface import PyTorchTrainerInterface
logger = logging.getLogger(__name__)
class PyTorchModelTrainer(PyTorchTrainerInterface):
def __init__(
self,
model: nn.Module,
optimizer: Optimizer,
criterion: nn.Module,
device: str,
init_model: Dict,
data_convertor: PyTorchDataConvertor,
model_meta_data: Dict[str, Any] = {},
**kwargs
):
"""
:param model: The PyTorch model to be trained.
:param optimizer: The optimizer to use for training.
:param criterion: The loss function to use for training.
:param device: The device to use for training (e.g. 'cpu', 'cuda').
:param init_model: A dictionary containing the initial model/optimizer
state_dict and model_meta_data saved by self.save() method.
:param model_meta_data: Additional metadata about the model (optional).
:param data_convertor: convertor from pd.DataFrame to torch.tensor.
:param max_iters: The number of training iterations to run.
iteration here refers to the number of times we call
self.optimizer.step(). used to calculate n_epochs.
:param batch_size: The size of the batches to use during training.
:param max_n_eval_batches: The maximum number batches to use for evaluation.
"""
self.model = model
self.optimizer = optimizer
self.criterion = criterion
self.model_meta_data = model_meta_data
self.device = device
self.max_iters: int = kwargs.get("max_iters", 100)
self.batch_size: int = kwargs.get("batch_size", 64)
self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
self.data_convertor = data_convertor
if init_model:
self.load_from_checkpoint(init_model)
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
"""
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param splits: splits to use in training, splits must contain "train",
optional "test" could be added by setting freqai.data_split_parameters.test_size > 0
in the config file.
- Calculates the predicted output for the batch using the PyTorch model.
- Calculates the loss between the predicted and actual output using a loss function.
- Computes the gradients of the loss with respect to the model's parameters using
backpropagation.
- Updates the model's parameters using an optimizer.
"""
data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits)
epochs = self.calc_n_epochs(
n_obs=len(data_dictionary["train_features"]),
batch_size=self.batch_size,
n_iters=self.max_iters
)
for epoch in range(1, epochs + 1):
# training
losses = []
for i, batch_data in enumerate(data_loaders_dictionary["train"]):
for tensor in batch_data:
tensor.to(self.device)
xb = batch_data[:-1]
yb = batch_data[-1]
yb_pred = self.model(xb)
loss = self.criterion(yb_pred, yb)
self.optimizer.zero_grad(set_to_none=True)
loss.backward()
self.optimizer.step()
losses.append(loss.item())
train_loss = sum(losses) / len(losses)
log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}"
# evaluation
if "test" in splits:
test_loss = self.estimate_loss(
data_loaders_dictionary,
self.max_n_eval_batches,
"test"
)
log_message += f" ; test loss {test_loss:.4f}"
logger.info(log_message)
@torch.no_grad()
def estimate_loss(
self,
data_loader_dictionary: Dict[str, DataLoader],
max_n_eval_batches: Optional[int],
split: str,
) -> float:
self.model.eval()
n_batches = 0
losses = []
for i, batch_data in enumerate(data_loader_dictionary[split]):
if max_n_eval_batches and i > max_n_eval_batches:
n_batches += 1
break
for tensor in batch_data:
tensor.to(self.device)
xb = batch_data[:-1]
yb = batch_data[-1]
yb_pred = self.model(xb)
loss = self.criterion(yb_pred, yb)
losses.append(loss.item())
self.model.train()
return sum(losses) / len(losses)
def create_data_loaders_dictionary(
self,
data_dictionary: Dict[str, pd.DataFrame],
splits: List[str]
) -> Dict[str, DataLoader]:
"""
Converts the input data to PyTorch tensors using a data loader.
"""
data_loader_dictionary = {}
for split in splits:
x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"])
y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"])
dataset = TensorDataset(*x, *y)
data_loader = DataLoader(
dataset,
batch_size=self.batch_size,
shuffle=True,
drop_last=True,
num_workers=0,
)
data_loader_dictionary[split] = data_loader
return data_loader_dictionary
@staticmethod
def calc_n_epochs(n_obs: int, batch_size: int, n_iters: int) -> int:
"""
Calculates the number of epochs required to reach the maximum number
of iterations specified in the model training parameters.
the motivation here is that `max_iters` is easier to optimize and keep stable,
across different n_obs - the number of data points.
"""
n_batches = math.ceil(n_obs // batch_size)
epochs = math.ceil(n_iters // n_batches)
if epochs <= 10:
logger.warning("User set `max_iters` in such a way that the trainer will only perform "
f" {epochs} epochs. Please consider increasing this value accordingly")
if epochs <= 1:
logger.warning("Epochs set to 1. Please review your `max_iters` value")
epochs = 1
return epochs
def save(self, path: Path):
"""
- Saving any nn.Module state_dict
- Saving model_meta_data, this dict should contain any additional data that the
user needs to store. e.g class_names for classification models.
"""
torch.save({
"model_state_dict": self.model.state_dict(),
"optimizer_state_dict": self.optimizer.state_dict(),
"model_meta_data": self.model_meta_data,
"pytrainer": self
}, path)
def load(self, path: Path):
checkpoint = torch.load(path)
return self.load_from_checkpoint(checkpoint)
def load_from_checkpoint(self, checkpoint: Dict):
"""
when using continual_learning, DataDrawer will load the dictionary
(containing state dicts and model_meta_data) by calling torch.load(path).
you can access this dict from any class that inherits IFreqaiModel by calling
get_init_model method.
"""
self.model.load_state_dict(checkpoint["model_state_dict"])
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
self.model_meta_data = checkpoint["model_meta_data"]
return self

View File

@ -0,0 +1,53 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List
import pandas as pd
import torch
from torch import nn
class PyTorchTrainerInterface(ABC):
@abstractmethod
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]) -> None:
"""
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param splits: splits to use in training, splits must contain "train",
optional "test" could be added by setting freqai.data_split_parameters.test_size > 0
in the config file.
- Calculates the predicted output for the batch using the PyTorch model.
- Calculates the loss between the predicted and actual output using a loss function.
- Computes the gradients of the loss with respect to the model's parameters using
backpropagation.
- Updates the model's parameters using an optimizer.
"""
@abstractmethod
def save(self, path: Path) -> None:
"""
- Saving any nn.Module state_dict
- Saving model_meta_data, this dict should contain any additional data that the
user needs to store. e.g class_names for classification models.
"""
def load(self, path: Path) -> nn.Module:
"""
:param path: path to zip file.
:returns: pytorch model.
"""
checkpoint = torch.load(path)
return self.load_from_checkpoint(checkpoint)
@abstractmethod
def load_from_checkpoint(self, checkpoint: Dict) -> nn.Module:
"""
when using continual_learning, DataDrawer will load the dictionary
(containing state dicts and model_meta_data) by calling torch.load(path).
you can access this dict from any class that inherits IFreqaiModel by calling
get_init_model method.
:checkpoint checkpoint: dict containing the model & optimizer state dicts,
model_meta_data, etc..
"""

View File

View File

@ -223,6 +223,7 @@ class FreqaiExampleHybridStrategy(IStrategy):
:param metadata: metadata of current pair
usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"]
"""
self.freqai.class_names = ["down", "up"]
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-50) >
dataframe["close"], 'up', 'down')

View File

@ -85,7 +85,7 @@ function updateenv() {
if [[ $REPLY =~ ^[Yy]$ ]]
then
REQUIREMENTS_FREQAI="-r requirements-freqai.txt --use-pep517"
read -p "Do you also want dependencies for freqai-rl (~700mb additional space required) [y/N]? "
read -p "Do you also want dependencies for freqai-rl or PyTorch (~700mb additional space required) [y/N]? "
if [[ $REPLY =~ ^[Yy]$ ]]
then
REQUIREMENTS_FREQAI="-r requirements-freqai-rl.txt"

View File

@ -1,5 +1,6 @@
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict
from unittest.mock import MagicMock
import pytest
@ -85,6 +86,22 @@ def make_rl_config(conf):
return conf
def mock_pytorch_mlp_model_training_parameters() -> Dict[str, Any]:
return {
"learning_rate": 3e-4,
"trainer_kwargs": {
"max_iters": 1,
"batch_size": 64,
"max_n_eval_batches": 1,
},
"model_kwargs": {
"hidden_dim": 32,
"dropout_percent": 0.2,
"n_layer": 1,
}
}
def get_patched_data_kitchen(mocker, freqaiconf):
dk = FreqaiDataKitchen(freqaiconf)
return dk

View File

@ -15,7 +15,8 @@ from freqtrade.optimize.backtesting import Backtesting
from freqtrade.persistence import Trade
from freqtrade.plugins.pairlistmanager import PairListManager
from tests.conftest import EXMS, create_mock_trades, get_patched_exchange, log_has_re
from tests.freqai.conftest import get_patched_freqai_strategy, make_rl_config
from tests.freqai.conftest import (get_patched_freqai_strategy, make_rl_config,
mock_pytorch_mlp_model_training_parameters)
def is_py11() -> bool:
@ -34,13 +35,14 @@ def is_mac() -> bool:
def can_run_model(model: str) -> None:
if (is_arm() or is_py11()) and "Catboost" in model:
pytest.skip("CatBoost is not supported on ARM")
pytest.skip("CatBoost is not supported on ARM.")
if is_mac() and not is_arm() and 'Reinforcement' in model:
pytest.skip("Reinforcement learning module not available on intel based Mac OS")
is_pytorch_model = 'Reinforcement' in model or 'PyTorch' in model
if is_pytorch_model and is_mac() and not is_arm():
pytest.skip("Reinforcement learning / PyTorch module not available on intel based Mac OS.")
if is_py11() and 'Reinforcement' in model:
pytest.skip("Reinforcement learning currently not available on python 3.11.")
if is_pytorch_model and is_py11():
pytest.skip("Reinforcement learning / PyTorch currently not available on python 3.11.")
@pytest.mark.parametrize('model, pca, dbscan, float32, can_short, shuffle, buffer', [
@ -48,11 +50,12 @@ def can_run_model(model: str) -> None:
('XGBoostRegressor', False, True, False, True, False, 10),
('XGBoostRFRegressor', False, False, False, True, False, 0),
('CatboostRegressor', False, False, False, True, True, 0),
('PyTorchMLPRegressor', False, False, False, True, False, 0),
('ReinforcementLearner', False, True, False, True, False, 0),
('ReinforcementLearner_multiproc', False, False, False, True, False, 0),
('ReinforcementLearner_test_3ac', False, False, False, False, False, 0),
('ReinforcementLearner_test_3ac', False, False, False, True, False, 0),
('ReinforcementLearner_test_4ac', False, False, False, True, False, 0)
('ReinforcementLearner_test_4ac', False, False, False, True, False, 0),
])
def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
dbscan, float32, can_short, shuffle, buffer):
@ -79,6 +82,11 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models")
freqai_conf["freqai"]["rl_config"]["drop_ohlc_from_features"] = True
if 'PyTorchMLPRegressor' in model:
model_save_ext = 'zip'
pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters()
freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp)
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
@ -123,8 +131,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
('CatboostClassifierMultiTarget', "freqai_test_multimodel_classifier_strat")
])
def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, strat):
if (is_arm() or is_py11()) and 'Catboost' in model:
pytest.skip("CatBoost is not supported on ARM")
can_run_model(model)
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": strat})
@ -164,10 +171,10 @@ def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, s
'CatboostClassifier',
'XGBoostClassifier',
'XGBoostRFClassifier',
'PyTorchMLPClassifier',
])
def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
if (is_arm() or is_py11()) and model == 'CatboostClassifier':
pytest.skip("CatBoost is not supported on ARM")
can_run_model(model)
freqai_conf.update({"freqaimodel": model})
freqai_conf.update({"strategy": "freqai_test_classifier"})
@ -193,7 +200,20 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
if 'PyTorchMLPClassifier':
pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters()
freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp)
if freqai.dd.model_type == 'joblib':
model_file_extension = ".joblib"
elif freqai.dd.model_type == "pytorch":
model_file_extension = ".zip"
else:
raise Exception(f"Unsupported model type: {freqai.dd.model_type},"
f" can't assign model_file_extension")
assert Path(freqai.dk.data_path /
f"{freqai.dk.model_filename}_model{model_file_extension}").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
@ -207,10 +227,12 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
("LightGBMRegressor", 2, "freqai_test_strat"),
("XGBoostRegressor", 2, "freqai_test_strat"),
("CatboostRegressor", 2, "freqai_test_strat"),
("PyTorchMLPRegressor", 2, "freqai_test_strat"),
("ReinforcementLearner", 3, "freqai_rl_test_strat"),
("XGBoostClassifier", 2, "freqai_test_classifier"),
("LightGBMClassifier", 2, "freqai_test_classifier"),
("CatboostClassifier", 2, "freqai_test_classifier")
("CatboostClassifier", 2, "freqai_test_classifier"),
("PyTorchMLPClassifier", 2, "freqai_test_classifier")
],
)
def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog):
@ -231,6 +253,10 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog)
if 'test_4ac' in model:
freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models")
if 'PyTorchMLP' in model:
pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters()
freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp)
freqai_conf.get("freqai", {}).get("feature_parameters", {}).update(
{"indicator_periods_candles": [2]})

View File

@ -82,7 +82,7 @@ class freqai_test_classifier(IStrategy):
return dataframe
def set_freqai_targets(self, dataframe: DataFrame, metadata: Dict, **kwargs):
self.freqai.class_names = ["down", "up"]
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) >
dataframe["close"], 'up', 'down')