Coverage for source/environment/trading_environment.py: 97%
119 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-30 15:13 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-30 15:13 +0000
1# environment/trading environment.py
3from gym import Env
4from gym.spaces import Discrete, Box
5import pandas as pd
6import numpy as np
7from sklearn.preprocessing import StandardScaler
8import math
9import random
10from types import SimpleNamespace
11from typing import Optional
12import copy
14from .broker import Broker
15from .reward_validator_base import RewardValidatorBase
17class TradingEnvironment(Env):
18 """
19 Implements stock market environment that actor can perform actions (place orders) in.
20 It is used to train Neural Network models with reinforcement learning approach. Can be
21 configure to award points and impose a penalty in a several way.
22 """
24 def __init__(self, data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int,
25 validator: RewardValidatorBase, sell_stop_loss: float, sell_take_profit: float,
26 buy_stop_loss: float, buy_take_profit: float, penalty_starts: int = 0, penalty_stops: int = 10,
27 static_reward_adjustment: float = 1) -> None:
28 """
29 Class constructor. Allows to define all crucial constans, reward validation methods,
30 environmental penalty policies, etc.
32 Parameters:
33 data_path (str): Path to CSV data that should be used as enivronmental stock market.
34 initial_budget (float): Initial budget constant for trader to start from.
35 max_amount_of_trades (int): Max amount of trades that can be ongoing at the same time.
36 Seting this constant prevents traders from placing orders randomly and defines
37 amount of money that can be assigned to a single order at certain iteration.
38 window_size (int): Constant defining how far in the past trader will be able to look
39 into at certain iteration.
40 validator (RewardValidatorBase): Validator implementing policy used to award points
41 for closed trades.
42 sell_stop_loss (float): Constant used to define losing boundary at which sell order
43 (short) is closed.
44 sell_take_profit (float): Constant used to define winning boundary at which sell order
45 (short) is closed.
46 buy_stop_loss (float): Constant used to define losing boundary at which buy order
47 (long) is closed.
48 buy_take_profit (float): Constant used to define winning boundary at which buy order
49 (long) is closed.
50 penalty_starts (int): Constant defining how many trading periods can trader go without placing
51 an order until penalty is imposed. Penalty at range between start and stop constant
52 is calculated as percentile of positive reward, and subtracted from the actual reward.
53 penalty_stops (int): Constant defining at which trading period penalty will no longer be increased.
54 Reward for trading periods exceeding penalty stop constant will equal minus static reward adjustment.
55 static_reward_adjustment (float): Constant use to penalize trader for bad choices or
56 reward it for good one.
57 """
59 self.__data: pd.DataFrame = pd.read_csv(data_path)
60 self.__broker: Broker = Broker()
61 self.__validator: RewardValidatorBase = validator
63 self.__trading_data: SimpleNamespace = SimpleNamespace()
64 self.__trading_data.current_budget: float = initial_budget
65 self.__trading_data.currently_invested: float = 0
66 self.__trading_data.no_trades_placed_for: int = 0
67 self.__trading_data.currently_placed_trades: int = 0
69 self.__trading_consts = SimpleNamespace()
70 self.__trading_consts.INITIAL_BUDGET: float = initial_budget
71 self.__trading_consts.MAX_AMOUNT_OF_TRADES: int = max_amount_of_trades
72 self.__trading_consts.WINDOW_SIZE: int = window_size
73 self.__trading_consts.SELL_STOP_LOSS: float = sell_stop_loss
74 self.__trading_consts.SELL_TAKE_PROFIT: float = sell_take_profit
75 self.__trading_consts.BUY_STOP_LOSS: float = buy_stop_loss
76 self.__trading_consts.BUY_TAKE_PROFIT: float = buy_take_profit
77 self.__trading_consts.STATIC_REWARD_ADJUSTMENT: float = static_reward_adjustment
78 self.__trading_consts.PENALTY_STARTS: int = penalty_starts
79 self.__trading_consts.PENALTY_STOPS: int = penalty_stops
80 self.__trading_consts.PROFITABILITY_FUNCTION = lambda x: -1.0 * math.exp(-x + 1) + 1
81 self.__trading_consts.PENALTY_FUNCTION = lambda x: \
82 min(1, 1 - math.tanh(-3.0 * (x - penalty_stops) / (penalty_stops - penalty_starts)))
84 self.current_iteration: int = self.__trading_consts.WINDOW_SIZE
85 self.state: list[float] = self.__prepare_state_data()
86 self.action_space: Discrete = Discrete(3)
87 self.observation_space: Box = Box(low = np.ones(len(self.state)) * -3,
88 high = np.ones(len(self.state)) * 3,
89 dtype=np.float64)
91 def __prepare_state_data(self) -> list[float]:
92 """
93 Calculates state data as a list of floats representing current iteration's observation.
94 Observations contains all input data refined to window size and couple of coefficients
95 giving an insight into current budget and orders situation.
97 Returns:
98 (list[float]): List with current observations for environment.
99 """
101 current_market_data = self.__data.iloc[self.current_iteration - self.__trading_consts.WINDOW_SIZE : self.current_iteration]
102 current_market_data_no_index = current_market_data.select_dtypes(include = [np.number])
103 normalized_current_market_data_values = pd.DataFrame(StandardScaler().fit_transform(current_market_data_no_index),
104 columns = current_market_data_no_index.columns).values
105 current_marked_data_list = normalized_current_market_data_values.ravel().tolist()
107 current_normalized_budget = 1.0 * self.__trading_data.current_budget / self.__trading_consts.INITIAL_BUDGET
108 current_profitability_coeff = self.__trading_consts.PROFITABILITY_FUNCTION(current_normalized_budget)
109 current_trades_occupancy_coeff = 1.0 * self.__trading_data.currently_placed_trades / self.__trading_consts.MAX_AMOUNT_OF_TRADES
110 current_no_trades_penalty_coeff = self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)
111 current_inner_state_list = [current_profitability_coeff, current_trades_occupancy_coeff, current_no_trades_penalty_coeff]
113 return current_marked_data_list + current_inner_state_list
115 def get_trading_data(self) -> SimpleNamespace:
116 """
117 Trading data getter.
119 Returns:
120 (SimpleNamespace): Copy of the namespace with all trading data.
121 """
123 return copy.copy(self.__trading_data)
125 def get_trading_consts(self) -> SimpleNamespace:
126 """
127 Trading constants getter.
129 Returns:
130 (SimpleNamespace): Copy of the namespace with all trading constants.
131 """
133 return copy.copy(self.__trading_consts)
135 def get_broker(self) -> Broker:
136 """
137 Broker getter.
139 Returns:
140 (Broker): Copy of the broker used by environment.
141 """
143 return copy.copy(self.__broker)
145 def get_environment_length(self) -> int:
146 """
147 Environment length getter.
149 Returns:
150 (Int): Length of environment.
151 """
153 return len(self.__data)
155 def get_environment_spatial_data_dimension(self) -> tuple[int, int]:
156 """
157 Environment spatial data dimensionality getter.
159 Returns:
160 (Int): Dimension of spatial data in environment.
161 """
163 return (self.__trading_consts.WINDOW_SIZE, self.__data.shape[1] - 1)
165 def get_data_for_iteration(self, columns: list[str], start: int, stop: int, step: int = 1) -> list[float]:
166 """
167 Data for certain iterations getter.
169 Returns:
170 (list[float]): Copy of part of data with specified columns
171 over specified iterations.
172 """
174 return copy.copy(self.__data.loc[start:stop:step, columns].values.ravel().tolist())
176 def step(self, action: int) -> tuple[list[float], float, bool, dict]:
177 """
178 Performs specified action on environment. It results in generation of the new
179 observations. This function causes trades to be handled, reward to be calculated and
180 environment to be updated.
182 Parameters:
183 action (int): Number specifing action. Possible values are 0 for buy action,
184 1 for wait action and 2 for sell action.
186 Returns:
187 (tuple[list[float], float, bool, dict]): Tuple containing next observation
188 state, reward, finish indication and additional info dictionary.
189 """
191 self.current_iteration += 1
192 self.state = self.__prepare_state_data()
194 close_changes = self.__data.iloc[self.current_iteration - 2 : self.current_iteration]['close'].values
195 stock_change_coeff = 1 + (close_changes[1] - close_changes[0]) / close_changes[0]
196 closed_orders= self.__broker.update_orders(stock_change_coeff)
198 reward = self.__validator.validate_orders(closed_orders)
199 self.__trading_data.currently_placed_trades -= len(closed_orders)
200 self.__trading_data.current_budget += np.sum([trade.current_value for trade in closed_orders])
201 self.__trading_data.currently_invested -= np.sum([trade.initial_value for trade in closed_orders])
203 number_of_possible_trades = self.__trading_consts.MAX_AMOUNT_OF_TRADES - self.__trading_data.currently_placed_trades
204 money_to_trade = 0
205 if number_of_possible_trades > 0:
206 money_to_trade = 1.0 / number_of_possible_trades * self.__trading_data.current_budget
208 if action == 0:
209 is_buy_order = True
210 stop_loss = self.__trading_consts.SELL_STOP_LOSS
211 take_profit = self.__trading_consts.SELL_TAKE_PROFIT
212 elif action == 2:
213 is_buy_order = False
214 stop_loss = self.__trading_consts.BUY_STOP_LOSS
215 take_profit = self.__trading_consts.BUY_TAKE_PROFIT
217 if action != 1:
218 if number_of_possible_trades > 0:
219 self.__trading_data.current_budget -= money_to_trade
220 self.__trading_data.currently_invested += money_to_trade
221 self.__broker.place_order(money_to_trade, is_buy_order, stop_loss, take_profit)
222 self.__trading_data.currently_placed_trades += 1
223 self.__trading_data.no_trades_placed_for = 0
224 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT
225 else:
226 self.__trading_data.no_trades_placed_for += 1
227 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT
228 else:
229 self.__trading_data.no_trades_placed_for += 1
230 if number_of_possible_trades == 0:
231 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT
233 if number_of_possible_trades > 0:
234 reward *= (1 - self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)) \
235 if reward > 0 else 1
236 if self.__trading_consts.PENALTY_STOPS < self.__trading_data.no_trades_placed_for:
237 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT
239 if (self.current_iteration >= len(self.__data) or
240 self.__trading_data.current_budget > 10 * self.__trading_consts.INITIAL_BUDGET or
241 (self.__trading_data.current_budget + self.__trading_data.currently_invested) / self.__trading_consts.INITIAL_BUDGET < 0.8):
242 done = True
243 else:
244 done = False
246 info = {'coeff': stock_change_coeff,
247 'iteration': self.current_iteration,
248 'number_of_closed_orders': len(closed_orders),
249 'money_to_trade': money_to_trade,
250 'action': action,
251 'current_budget': self.__trading_data.current_budget,
252 'currently_invested': self.__trading_data.currently_invested,
253 'no_trades_placed_for': self.__trading_data.no_trades_placed_for,
254 'currently_placed_trades': self.__trading_data.currently_placed_trades}
256 return self.state, reward, done, info
258 def render(self) -> None:
259 """
260 Renders environment visualization. Will be implemented later.
261 """
263 # Visualization to be implemented
264 pass
266 def reset(self, randkey: Optional[int] = None) -> list[float]:
267 """
268 Resets environment. Used typically if environemnt is finished,
269 i.e. when ther is no more steps to be taken within environemnt
270 or finish conditions are fulfilled.
272 Parameters:
273 randkey (Optional[int]): Value indicating what iteration
274 should be trated as starting point after reset.
276 Returns:
277 (list[float]): Current iteration observation state.
278 """
280 if randkey is None:
281 randkey = random.randint(self.__trading_consts.WINDOW_SIZE, len(self.__data) - 1)
282 self.__trading_data.current_budget = self.__trading_consts.INITIAL_BUDGET
283 self.__trading_data.currently_invested = 0
284 self.__trading_data.no_trades_placed_for = 0
285 self.__trading_data.currently_placed_trades = 0
286 self.__broker.reset()
287 self.current_iteration = randkey
288 self.state = self.__prepare_state_data()
290 return self.state