Coverage for source/training/training

1# training/training_config.py

3from rl.policy import Policy, BoltzmannQPolicy

4from tensorflow.keras.optimizers import Optimizer, Adam

6from ..environment.trading_environment import TradingEnvironment

7from ..agent.agent_handler import AgentHandler

8from ..model.model_blue_prints.base_blue_print import BaseBluePrint

9from ..environment.reward_validator_base import RewardValidatorBase

11class TrainingConfig():

12 """

13 Responsible for creating and configuring training environment and agent.

15 This class encapsulates all configuration parameters needed for training

16 and testing a trading agent, including environment setup, agent creation,

17 and reward validation. It provides a centralized way to manage training

18 parameters and instantiate required components.

19 """

21 def __init__(self, nr_of_steps: int, nr_of_episodes: int, model_blue_print: BaseBluePrint,

22 data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int,

23 validator: RewardValidatorBase, sell_stop_loss: float = 0.8, sell_take_profit: float = 1.2,

24 buy_stop_loss: float = 0.8, buy_take_profit: float = 1.2, penalty_starts: int = 0,

25 penalty_stops: int = 10, static_reward_adjustment: float = 1, policy: Policy = BoltzmannQPolicy(),

26 optimizer: Optimizer = Adam(learning_rate=1e-3), repeat_test: int = 10) -> None:

27 """

28 Initializes the training configuration with provided parameters.

30 Parameters:

31 nr_of_steps (int): Total number of training steps.

32 nr_of_episodes (int): Number of training episodes.

33 model_blue_print (BaseBluePrint): Blueprint for creating the neural network model.

34 data_path (str): Path to the training data file.

35 initial_budget (float): Starting budget for the agent.

36 max_amount_of_trades (int): Maximum number of trades allowed to be placed in the environment.

37 window_size (int): Size of the observation window for market data.

38 validator (RewardValidatorBase): Strategy for validating and calculating rewards.

39 sell_stop_loss (float): Coefficient defining when to stop loss on sell positions.

40 sell_take_profit (float): Coefficient defining when to take profit on sell positions.

41 buy_stop_loss (float): Coefficient defining when to stop loss on buy positions.

42 buy_take_profit (float): Coefficient defining when to take profit on buy positions.

43 penalty_starts (int): Starting point (in trading periods without activity) that penalty should be applied from.

44 penalty_stops (int): Ending point (in trading periods without activity) that penalty growth should be stopped at.

45 static_reward_adjustment (float): Adjustment factor for rewards, used to penalize unwanted actions.

46 policy (Policy): Policy for action selection during training.

47 optimizer (Optimizer): Optimizer to be used for model compilation and training.

48 repeat_test (int): Number of times to repeat testing for evaluation.

49 """

51 # Training config

52 self.nr_of_steps = nr_of_steps

53 self.nr_of_episodes = nr_of_episodes

54 self.repeat_test = repeat_test

56 # Environment config

57 self.__data_path: str = data_path

58 self.__initial_budget: float = initial_budget

59 self.__max_amount_of_trades: int = max_amount_of_trades

60 self.__window_size: int = window_size

61 self.__sell_stop_loss: float = sell_stop_loss

62 self.__sell_take_profit: float = sell_take_profit

63 self.__buy_stop_loss: float = buy_stop_loss

64 self.__buy_take_profit: float = buy_take_profit

65 self.__penalty_starts: int = penalty_starts

66 self.__penalty_stops: int = penalty_stops

67 self.__static_reward_adjustment: float = static_reward_adjustment

68 self.__validator: RewardValidatorBase = validator

69 self.__instantiated_environment: TradingEnvironment = None

71 # Agent config

72 self.__model_blue_print: BaseBluePrint = model_blue_print

73 self.__policy: Policy = policy

74 self.__optimizer: Optimizer = optimizer

75 self.__instantiated_agent: AgentHandler = None

77 def __str__(self) -> str:

78 """

79 Returns a string representation of the configuration.

81 Creates a formatted multi-line string containing all configuration

82 parameters and their values for easy logging.

84 Returns:

85 str: Formatted string representation of the configuration.

86 """

88 return f"Training config:\n" \

89 f"\tnr_of_steps: {self.nr_of_steps}\n" \

90 f"\tnr_of_episodes: {self.nr_of_episodes}\n" \

91 f"\trepeat_test: {self.repeat_test}\n" \

92 f"\tinitial_budget: {self.__initial_budget}\n" \

93 f"\tmax_amount_of_trades: {self.__max_amount_of_trades}\n" \

94 f"\twindow_size: {self.__window_size}\n" \

95 f"\tsell_stop_loss: {self.__sell_stop_loss}\n" \

96 f"\tsell_take_profit: {self.__sell_take_profit}\n" \

97 f"\tbuy_stop_loss: {self.__buy_stop_loss}\n" \

98 f"\tbuy_take_profit: {self.__buy_take_profit}\n" \

99 f"\tpenalty_starts: {self.__penalty_starts}\n" \

100 f"\tpenalty_stops: {self.__penalty_stops}\n" \

101 f"\tstatic_reward_adjustment: {self.__static_reward_adjustment}\n" \

102 f"\tvalidator: {self.__validator.__class__.__name__}\n" \

103 f"\t\t{vars(self.__validator)}\n" \

104 f"\tmodel_blue_print: {self.__model_blue_print.__class__.__name__}\n" \

105 f"\t\t{vars(self.__model_blue_print)}\n" \

106 f"\tpolicy: {self.__policy.__class__.__name__}\n" \

107 f"\t\t{vars(self.__policy)}\n" \

108 f"\toptimizer: {self.__optimizer.__class__.__name__}\n" \

109 f"\t\t{self.__optimizer._hyper}\n"

110

111 def instantiate_environment(self) -> TradingEnvironment:

112 """

113 Creates and returns a TradingEnvironment based on the configuration.

114

115 Instantiates a new trading environment with the parameters specified

116 in this config. Stores the created environment internally for later use

117 when creating the agent.

118

119 Returns:

120 TradingEnvironment: Configured trading environment ready for training.

121 """

122

123 self.__instantiated_environment = TradingEnvironment(self.__data_path,

124 self.__initial_budget,

125 self.__max_amount_of_trades,

126 self.__window_size,

127 self.__validator,

128 self.__sell_stop_loss,

129 self.__sell_take_profit,

130 self.__buy_stop_loss,

131 self.__buy_take_profit,

132 self.__penalty_starts,

133 self.__penalty_stops,

134 self.__static_reward_adjustment)

135

136 return self.__instantiated_environment

137

138 def instantiate_agent(self) -> AgentHandler:

139 """

140 Creates and returns an AgentHandler based on the configuration.

141

142 Uses the model blueprint to create a neural network model with the correct

143 input and output dimensions based on the environment's observation and action

144 spaces. Then wraps this model in an AgentHandler with the specified policy

145 and optimizer.

146

147 Returns:

148 AgentHandler: Configured agent handler ready for training.

149

150 Raises:

151 RuntimeError: If environment has not been instantiated first.

152 """

153

154 if self.__instantiated_environment is None:

155 raise RuntimeError("Environment not instantiated yet!")

156

157 observation_space_shape = self.__instantiated_environment.observation_space.shape

158 nr_of_actions = self.__instantiated_environment.action_space.n

159 spatial_data_shape = self.__instantiated_environment.get_environment_spatial_data_dimension()

160 model = self.__model_blue_print.instantiate_model(observation_space_shape, nr_of_actions, spatial_data_shape)

161 self.__instantiated_agent = AgentHandler(model, self.__policy, nr_of_actions, self.__optimizer)

162

163 return self.__instantiated_agent

Coverage for source/training/training_config.py: 86%

42 statements