bugfix skip test split when empty
This commit is contained in:
parent
8903ba5d89
commit
026b6a39a9
@ -97,7 +97,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
target_column_name = dk.label_list[0]
|
target_column_name = dk.label_list[0]
|
||||||
for split in ["train", "test"]:
|
for split in self.splits:
|
||||||
label_df = data_dictionary[f"{split}_labels"]
|
label_df = data_dictionary[f"{split}_labels"]
|
||||||
self.assert_valid_class_names(label_df[target_column_name], class_names)
|
self.assert_valid_class_names(label_df[target_column_name], class_names)
|
||||||
label_df[target_column_name] = list(
|
label_df[target_column_name] = list(
|
||||||
|
@ -22,6 +22,8 @@ class BasePyTorchModel(IFreqaiModel):
|
|||||||
super().__init__(config=kwargs["config"])
|
super().__init__(config=kwargs["config"])
|
||||||
self.dd.model_type = "pytorch"
|
self.dd.model_type = "pytorch"
|
||||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size')
|
||||||
|
self.splits = ["train", "test"] if test_size != 0 else ["train"]
|
||||||
|
|
||||||
def train(
|
def train(
|
||||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||||
|
@ -76,5 +76,5 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
|
|||||||
squeeze_target_tensor=True,
|
squeeze_target_tensor=True,
|
||||||
**self.trainer_kwargs,
|
**self.trainer_kwargs,
|
||||||
)
|
)
|
||||||
trainer.fit(data_dictionary)
|
trainer.fit(data_dictionary, self.splits)
|
||||||
return trainer
|
return trainer
|
||||||
|
@ -72,5 +72,5 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
|
|||||||
target_tensor_type=torch.float,
|
target_tensor_type=torch.float,
|
||||||
**self.trainer_kwargs,
|
**self.trainer_kwargs,
|
||||||
)
|
)
|
||||||
trainer.fit(data_dictionary)
|
trainer.fit(data_dictionary, self.splits)
|
||||||
return trainer
|
return trainer
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import torch
|
import torch
|
||||||
@ -43,7 +43,6 @@ class PyTorchModelTrainer:
|
|||||||
self.optimizer.step(). used to calculate n_epochs.
|
self.optimizer.step(). used to calculate n_epochs.
|
||||||
:param batch_size: The size of the batches to use during training.
|
:param batch_size: The size of the batches to use during training.
|
||||||
:param max_n_eval_batches: The maximum number batches to use for evaluation.
|
:param max_n_eval_batches: The maximum number batches to use for evaluation.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
self.model = model
|
self.model = model
|
||||||
self.optimizer = optimizer
|
self.optimizer = optimizer
|
||||||
@ -58,21 +57,27 @@ class PyTorchModelTrainer:
|
|||||||
if init_model:
|
if init_model:
|
||||||
self.load_from_checkpoint(init_model)
|
self.load_from_checkpoint(init_model)
|
||||||
|
|
||||||
def fit(self, data_dictionary: Dict[str, pd.DataFrame]):
|
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
|
||||||
"""
|
"""
|
||||||
|
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||||
|
all the training and test data/labels.
|
||||||
|
:param splits: splits to use in training, splits must contain "train",
|
||||||
|
optional "test" could be added by setting freqai.data_split_parameters.test_size > 0
|
||||||
|
in the config file.
|
||||||
|
|
||||||
- Calculates the predicted output for the batch using the PyTorch model.
|
- Calculates the predicted output for the batch using the PyTorch model.
|
||||||
- Calculates the loss between the predicted and actual output using a loss function.
|
- Calculates the loss between the predicted and actual output using a loss function.
|
||||||
- Computes the gradients of the loss with respect to the model's parameters using
|
- Computes the gradients of the loss with respect to the model's parameters using
|
||||||
backpropagation.
|
backpropagation.
|
||||||
- Updates the model's parameters using an optimizer.
|
- Updates the model's parameters using an optimizer.
|
||||||
"""
|
"""
|
||||||
data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary)
|
data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits)
|
||||||
epochs = self.calc_n_epochs(
|
epochs = self.calc_n_epochs(
|
||||||
n_obs=len(data_dictionary["train_features"]),
|
n_obs=len(data_dictionary["train_features"]),
|
||||||
batch_size=self.batch_size,
|
batch_size=self.batch_size,
|
||||||
n_iters=self.max_iters
|
n_iters=self.max_iters
|
||||||
)
|
)
|
||||||
for epoch in range(epochs):
|
for epoch in range(1, epochs+1):
|
||||||
# training
|
# training
|
||||||
losses = []
|
losses = []
|
||||||
for i, batch_data in enumerate(data_loaders_dictionary["train"]):
|
for i, batch_data in enumerate(data_loaders_dictionary["train"]):
|
||||||
@ -87,13 +92,18 @@ class PyTorchModelTrainer:
|
|||||||
self.optimizer.step()
|
self.optimizer.step()
|
||||||
losses.append(loss.item())
|
losses.append(loss.item())
|
||||||
train_loss = sum(losses) / len(losses)
|
train_loss = sum(losses) / len(losses)
|
||||||
|
log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}"
|
||||||
|
|
||||||
# evaluation
|
# evaluation
|
||||||
test_loss = self.estimate_loss(data_loaders_dictionary, self.max_n_eval_batches, "test")
|
if "test" in splits:
|
||||||
logger.info(
|
test_loss = self.estimate_loss(
|
||||||
f"epoch {epoch}/{epochs}:"
|
data_loaders_dictionary,
|
||||||
f" train loss {train_loss:.4f} ; test loss {test_loss:.4f}"
|
self.max_n_eval_batches,
|
||||||
)
|
"test"
|
||||||
|
)
|
||||||
|
log_message += f" ; test loss {test_loss:.4f}"
|
||||||
|
|
||||||
|
logger.info(log_message)
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def estimate_loss(
|
def estimate_loss(
|
||||||
@ -122,13 +132,14 @@ class PyTorchModelTrainer:
|
|||||||
|
|
||||||
def create_data_loaders_dictionary(
|
def create_data_loaders_dictionary(
|
||||||
self,
|
self,
|
||||||
data_dictionary: Dict[str, pd.DataFrame]
|
data_dictionary: Dict[str, pd.DataFrame],
|
||||||
|
splits: List[str]
|
||||||
) -> Dict[str, DataLoader]:
|
) -> Dict[str, DataLoader]:
|
||||||
"""
|
"""
|
||||||
Converts the input data to PyTorch tensors using a data loader.
|
Converts the input data to PyTorch tensors using a data loader.
|
||||||
"""
|
"""
|
||||||
data_loader_dictionary = {}
|
data_loader_dictionary = {}
|
||||||
for split in ["train", "test"]:
|
for split in splits:
|
||||||
x = torch.from_numpy(data_dictionary[f"{split}_features"].values).float()
|
x = torch.from_numpy(data_dictionary[f"{split}_features"].values).float()
|
||||||
y = torch.from_numpy(data_dictionary[f"{split}_labels"].values)\
|
y = torch.from_numpy(data_dictionary[f"{split}_labels"].values)\
|
||||||
.to(self.target_tensor_type)
|
.to(self.target_tensor_type)
|
||||||
|
Loading…
Reference in New Issue
Block a user