Coverage for source/environment/trading_environment.py: 23%
155 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-06-06 12:00 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-06-06 12:00 +0000
1# environment/trading environment.py
3# global imports
4from gym import Env
5from gym.spaces import Discrete, Box
6import pandas as pd
7import numpy as np
8from sklearn.preprocessing import StandardScaler
9import math
10import random
11from types import SimpleNamespace
12from typing import Optional
13import copy
14from tensorflow.keras.utils import to_categorical
16# local imports
17from source.environment import Broker
18from source.environment import RewardValidatorBase
19from source.environment import LabelAnnotatorBase
21class TradingEnvironment(Env):
22 """
23 Implements stock market environment that actor can perform actions (place orders) in.
24 It is used to train Neural Network models with reinforcement learning approach. Can be
25 configure to award points and impose a penalty in a several way.
26 """
28 TRAIN_MODE = 'train'
29 TEST_MODE = 'test'
31 def __init__(self, data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int,
32 validator: RewardValidatorBase, label_annotator: LabelAnnotatorBase, sell_stop_loss: float,
33 sell_take_profit: float, buy_stop_loss: float, buy_take_profit: float, test_ratio: float = 0.2,
34 penalty_starts: int = 0, penalty_stops: int = 10, static_reward_adjustment: float = 1) -> None:
35 """
36 Class constructor. Allows to define all crucial constans, reward validation methods,
37 environmental penalty policies, etc.
39 Parameters:
40 data_path (str): Path to CSV data that should be used as enivronmental stock market.
41 initial_budget (float): Initial budget constant for trader to start from.
42 max_amount_of_trades (int): Max amount of trades that can be ongoing at the same time.
43 Seting this constant prevents traders from placing orders randomly and defines
44 amount of money that can be assigned to a single order at certain iteration.
45 window_size (int): Constant defining how far in the past trader will be able to look
46 into at certain iteration.
47 validator (RewardValidatorBase): Validator implementing policy used to award points
48 for closed trades.
49 label_annotator (LabelAnnotatorBase): Annotator implementing policy used to label
50 data with target values. It is used to provide supervised agents with information
51 about what is the target class value for certain iteration.
52 sell_stop_loss (float): Constant used to define losing boundary at which sell order
53 (short) is closed.
54 sell_take_profit (float): Constant used to define winning boundary at which sell order
55 (short) is closed.
56 buy_stop_loss (float): Constant used to define losing boundary at which buy order
57 (long) is closed.
58 buy_take_profit (float): Constant used to define winning boundary at which buy order
59 (long) is closed.
60 test_ratio (float): Ratio of data that should be used for testing purposes.
61 penalty_starts (int): Constant defining how many trading periods can trader go without placing
62 an order until penalty is imposed. Penalty at range between start and stop constant
63 is calculated as percentile of positive reward, and subtracted from the actual reward.
64 penalty_stops (int): Constant defining at which trading period penalty will no longer be increased.
65 Reward for trading periods exceeding penalty stop constant will equal minus static reward adjustment.
66 static_reward_adjustment (float): Constant use to penalize trader for bad choices or
67 reward it for good one.
68 """
70 if test_ratio < 0.0 or test_ratio >= 1.0:
71 raise ValueError(f"Invalid test_ratio: {test_ratio}. It should be in range [0, 1).")
73 self.__data: dict[pd.DataFrame, pd.DataFrame] = self.__load_data(data_path, test_ratio)
74 self.__mode = TradingEnvironment.TRAIN_MODE
75 self.__broker: Broker = Broker()
76 self.__validator: RewardValidatorBase = validator
77 self.__label_annotator: LabelAnnotatorBase = label_annotator
79 self.__trading_data: SimpleNamespace = SimpleNamespace()
80 self.__trading_data.current_budget: float = initial_budget
81 self.__trading_data.currently_invested: float = 0
82 self.__trading_data.no_trades_placed_for: int = 0
83 self.__trading_data.currently_placed_trades: int = 0
85 self.__trading_consts = SimpleNamespace()
86 self.__trading_consts.INITIAL_BUDGET: float = initial_budget
87 self.__trading_consts.MAX_AMOUNT_OF_TRADES: int = max_amount_of_trades
88 self.__trading_consts.WINDOW_SIZE: int = window_size
89 self.__trading_consts.SELL_STOP_LOSS: float = sell_stop_loss
90 self.__trading_consts.SELL_TAKE_PROFIT: float = sell_take_profit
91 self.__trading_consts.BUY_STOP_LOSS: float = buy_stop_loss
92 self.__trading_consts.BUY_TAKE_PROFIT: float = buy_take_profit
93 self.__trading_consts.STATIC_REWARD_ADJUSTMENT: float = static_reward_adjustment
94 self.__trading_consts.PENALTY_STARTS: int = penalty_starts
95 self.__trading_consts.PENALTY_STOPS: int = penalty_stops
96 self.__trading_consts.PROFITABILITY_FUNCTION = lambda x: -1.0 * math.exp(-x + 1) + 1
97 self.__trading_consts.PENALTY_FUNCTION = lambda x: \
98 min(1, 1 - math.tanh(-3.0 * (x - penalty_stops) / (penalty_stops - penalty_starts)))
99 self.__trading_consts.OUTPUT_CLASSES: int = vars(self.__label_annotator.get_output_classes())
101 self.current_iteration: int = self.__trading_consts.WINDOW_SIZE
102 self.state: list[float] = self.__prepare_state_data()
103 self.action_space: Discrete = Discrete(3)
104 self.observation_space: Box = Box(low = np.ones(len(self.state)) * -3,
105 high = np.ones(len(self.state)) * 3,
106 dtype=np.float64)
108 def __load_data(self, data_path: str, test_size: float) -> dict[pd.DataFrame, pd.DataFrame]:
109 """
110 Loads data from CSV file and splits it into training and testing sets based on the
111 specified test size ratio.
113 Parameters:
114 data_path (str): Path to the CSV file containing the stock market data.
115 test_size (float): Ratio of the data to be used for testing.
117 Returns:
118 (dict[pd.DataFrame, pd.DataFrame]): Dictionary containing training and testing data frames.
119 """
121 data_frame = pd.read_csv(data_path)
122 dividing_index = int(len(data_frame) * (1 - test_size))
124 return {
125 TradingEnvironment.TRAIN_MODE: data_frame.iloc[:dividing_index].reset_index(drop=True),
126 TradingEnvironment.TEST_MODE: data_frame.iloc[dividing_index:].reset_index(drop=True)
127 }
129 def __prepare_labeled_data(self) -> pd.DataFrame:
130 """"""
132 new_rows = []
133 for i in range(self.current_iteration, self.get_environment_length()):
134 data_row = self.__prepare_state_data(slice(i - self.__trading_consts.WINDOW_SIZE, i), include_trading_data = False)
135 new_rows.append(data_row)
137 new_data = pd.DataFrame(new_rows, columns=[f"feature_{i}" for i in range(len(new_rows[0]))])
138 labels = self.__label_annotator.annotate(self.__data[self.__mode]).shift(-self.current_iteration)
140 return new_data, labels.dropna()
142 def __prepare_state_data(self, index: Optional[slice] = None, include_trading_data: bool = True) -> list[float]:
143 """
144 Calculates state data as a list of floats representing current iteration's observation.
145 Observations contains all input data refined to window size and couple of coefficients
146 giving an insight into current budget and orders situation.
148 Returns:
149 (list[float]): List with current observations for environment.
150 """
152 if index is None:
153 index = slice(self.current_iteration - self.__trading_consts.WINDOW_SIZE, self.current_iteration)
155 current_market_data = self.__data[self.__mode].iloc[index]
156 current_market_data_no_index = current_market_data.select_dtypes(include = [np.number])
157 normalized_current_market_data_values = pd.DataFrame(StandardScaler().fit_transform(current_market_data_no_index),
158 columns = current_market_data_no_index.columns).values
159 current_marked_data_list = normalized_current_market_data_values.ravel().tolist()
161 if include_trading_data:
162 current_normalized_budget = 1.0 * self.__trading_data.current_budget / self.__trading_consts.INITIAL_BUDGET
163 current_profitability_coeff = self.__trading_consts.PROFITABILITY_FUNCTION(current_normalized_budget)
164 current_trades_occupancy_coeff = 1.0 * self.__trading_data.currently_placed_trades / self.__trading_consts.MAX_AMOUNT_OF_TRADES
165 current_no_trades_penalty_coeff = self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)
166 current_inner_state_list = [current_profitability_coeff, current_trades_occupancy_coeff, current_no_trades_penalty_coeff]
167 current_marked_data_list += current_inner_state_list
169 return current_marked_data_list
171 def set_mode(self, mode: str) -> None:
172 """
173 Sets the mode of the environment to either TRAIN_MODE or TEST_MODE.
175 Parameters:
176 mode (str): Mode to set for the environment.
178 Raises:
179 ValueError: If the provided mode is not valid.
180 """
182 if mode not in [TradingEnvironment.TRAIN_MODE, TradingEnvironment.TEST_MODE]:
183 raise ValueError(f"Invalid mode: {mode}. Use TradingEnvironment.TRAIN_MODE or TradingEnvironment.TEST_MODE.")
184 self.__mode = mode
186 def get_mode(self) -> str:
187 """
188 Mode getter.
190 Returns:
191 (str): Current mode of the environment.
192 """
194 return copy.copy(self.__mode)
196 def get_trading_data(self) -> SimpleNamespace:
197 """
198 Trading data getter.
200 Returns:
201 (SimpleNamespace): Copy of the namespace with all trading data.
202 """
204 return copy.copy(self.__trading_data)
206 def get_trading_consts(self) -> SimpleNamespace:
207 """
208 Trading constants getter.
210 Returns:
211 (SimpleNamespace): Copy of the namespace with all trading constants.
212 """
214 return copy.copy(self.__trading_consts)
216 def get_broker(self) -> Broker:
217 """
218 Broker getter.
220 Returns:
221 (Broker): Copy of the broker used by environment.
222 """
224 return copy.copy(self.__broker)
226 def get_environment_length(self) -> int:
227 """
228 Environment length getter.
230 Returns:
231 (Int): Length of environment.
232 """
234 return len(self.__data[self.__mode])
236 def get_environment_spatial_data_dimension(self) -> tuple[int, int]:
237 """
238 Environment spatial data dimensionality getter.
240 Returns:
241 (Int): Dimension of spatial data in environment.
242 """
244 return (self.__trading_consts.WINDOW_SIZE, self.__data[self.__mode].shape[1] - 1)
246 def get_labeled_data(self) -> tuple[np.ndarray, np.ndarray]:
247 """"""
249 input_data, output_data = self.__prepare_labeled_data()
250 input_data = np.expand_dims(np.array(input_data), axis = 1)
251 output_data = to_categorical(np.array(output_data),
252 num_classes = len(self.__trading_consts.OUTPUT_CLASSES))
253 return copy.copy((input_data, output_data))
255 def get_data_for_iteration(self, columns: list[str], start: int, stop: int, step: int = 1) -> list[float]:
256 """
257 Data for certain iterations getter.
259 Parameters:
260 columns (list[str]): List of column names to extract from data.
261 start (int): Start iteration index.
262 stop (int): Stop iteration index.
263 step (int): Step between iterations. Default is 1.
265 Returns:
266 (list[float]): Copy of part of data with specified columns
267 over specified iterations.
268 """
270 return copy.copy(self.__data[self.__mode].loc[start:stop:step, columns].values.ravel().tolist())
272 def step(self, action: int) -> tuple[list[float], float, bool, dict]:
273 """
274 Performs specified action on environment. It results in generation of the new
275 observations. This function causes trades to be handled, reward to be calculated and
276 environment to be updated.
278 Parameters:
279 action (int): Number specifing action. Possible values are 0 for buy action,
280 1 for wait action and 2 for sell action.
282 Returns:
283 (tuple[list[float], float, bool, dict]): Tuple containing next observation
284 state, reward, finish indication and additional info dictionary.
285 """
287 self.current_iteration += 1
288 self.state = self.__prepare_state_data()
290 close_changes = self.__data[self.__mode].iloc[self.current_iteration - 2 : self.current_iteration]['close'].values
291 stock_change_coeff = 1 + (close_changes[1] - close_changes[0]) / close_changes[0]
292 closed_orders= self.__broker.update_orders(stock_change_coeff)
294 reward = self.__validator.validate_orders(closed_orders)
295 self.__trading_data.currently_placed_trades -= len(closed_orders)
296 self.__trading_data.current_budget += np.sum([trade.current_value for trade in closed_orders])
297 self.__trading_data.currently_invested -= np.sum([trade.initial_value for trade in closed_orders])
299 number_of_possible_trades = self.__trading_consts.MAX_AMOUNT_OF_TRADES - self.__trading_data.currently_placed_trades
300 money_to_trade = 0
301 if number_of_possible_trades > 0:
302 money_to_trade = 1.0 / number_of_possible_trades * self.__trading_data.current_budget
304 if action == 0:
305 is_buy_order = True
306 stop_loss = self.__trading_consts.SELL_STOP_LOSS
307 take_profit = self.__trading_consts.SELL_TAKE_PROFIT
308 elif action == 2:
309 is_buy_order = False
310 stop_loss = self.__trading_consts.BUY_STOP_LOSS
311 take_profit = self.__trading_consts.BUY_TAKE_PROFIT
313 if action != 1:
314 if number_of_possible_trades > 0:
315 self.__trading_data.current_budget -= money_to_trade
316 self.__trading_data.currently_invested += money_to_trade
317 self.__broker.place_order(money_to_trade, is_buy_order, stop_loss, take_profit)
318 self.__trading_data.currently_placed_trades += 1
319 self.__trading_data.no_trades_placed_for = 0
320 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT
321 else:
322 self.__trading_data.no_trades_placed_for += 1
323 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT
324 else:
325 self.__trading_data.no_trades_placed_for += 1
326 if number_of_possible_trades == 0:
327 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT
329 if number_of_possible_trades > 0:
330 reward *= (1 - self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)) \
331 if reward > 0 else 1
332 if self.__trading_consts.PENALTY_STOPS < self.__trading_data.no_trades_placed_for:
333 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT
335 if (self.current_iteration >= self.get_environment_length() - 1 or
336 self.__trading_data.current_budget > 10 * self.__trading_consts.INITIAL_BUDGET or
337 (self.__trading_data.current_budget + self.__trading_data.currently_invested) / self.__trading_consts.INITIAL_BUDGET < 0.8):
338 done = True
339 else:
340 done = False
342 info = {'coeff': stock_change_coeff,
343 'iteration': self.current_iteration,
344 'number_of_closed_orders': len(closed_orders),
345 'money_to_trade': money_to_trade,
346 'action': action,
347 'current_budget': self.__trading_data.current_budget,
348 'currently_invested': self.__trading_data.currently_invested,
349 'no_trades_placed_for': self.__trading_data.no_trades_placed_for,
350 'currently_placed_trades': self.__trading_data.currently_placed_trades}
352 return self.state, reward, done, info
354 def render(self) -> None:
355 """
356 Renders environment visualization. Will be implemented later.
357 """
359 #TODO: Visualization to be implemented
360 pass
362 def reset(self, randkey: Optional[int] = None) -> list[float]:
363 """
364 Resets environment. Used typically if environemnt is finished,
365 i.e. when ther is no more steps to be taken within environemnt
366 or finish conditions are fulfilled.
368 Parameters:
369 randkey (Optional[int]): Value indicating what iteration
370 should be trated as starting point after reset.
372 Returns:
373 (list[float]): Current iteration observation state.
374 """
376 if randkey is None:
377 randkey = random.randint(self.__trading_consts.WINDOW_SIZE, self.get_environment_length() - 1)
378 self.__trading_data.current_budget = self.__trading_consts.INITIAL_BUDGET
379 self.__trading_data.currently_invested = 0
380 self.__trading_data.no_trades_placed_for = 0
381 self.__trading_data.currently_placed_trades = 0
382 self.__broker.reset()
383 self.current_iteration = randkey
384 self.state = self.__prepare_state_data()
386 return self.state