Coverage for source/environment/trading_environment.py: 97%

119 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-30 15:13 +0000

1# environment/trading environment.py 

2 

3from gym import Env 

4from gym.spaces import Discrete, Box 

5import pandas as pd 

6import numpy as np 

7from sklearn.preprocessing import StandardScaler 

8import math 

9import random 

10from types import SimpleNamespace 

11from typing import Optional 

12import copy 

13 

14from .broker import Broker 

15from .reward_validator_base import RewardValidatorBase 

16 

17class TradingEnvironment(Env): 

18 """ 

19 Implements stock market environment that actor can perform actions (place orders) in. 

20 It is used to train Neural Network models with reinforcement learning approach. Can be 

21 configure to award points and impose a penalty in a several way. 

22 """ 

23 

24 def __init__(self, data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int, 

25 validator: RewardValidatorBase, sell_stop_loss: float, sell_take_profit: float, 

26 buy_stop_loss: float, buy_take_profit: float, penalty_starts: int = 0, penalty_stops: int = 10, 

27 static_reward_adjustment: float = 1) -> None: 

28 """ 

29 Class constructor. Allows to define all crucial constans, reward validation methods, 

30 environmental penalty policies, etc. 

31 

32 Parameters: 

33 data_path (str): Path to CSV data that should be used as enivronmental stock market. 

34 initial_budget (float): Initial budget constant for trader to start from. 

35 max_amount_of_trades (int): Max amount of trades that can be ongoing at the same time. 

36 Seting this constant prevents traders from placing orders randomly and defines 

37 amount of money that can be assigned to a single order at certain iteration. 

38 window_size (int): Constant defining how far in the past trader will be able to look 

39 into at certain iteration. 

40 validator (RewardValidatorBase): Validator implementing policy used to award points 

41 for closed trades. 

42 sell_stop_loss (float): Constant used to define losing boundary at which sell order 

43 (short) is closed. 

44 sell_take_profit (float): Constant used to define winning boundary at which sell order 

45 (short) is closed. 

46 buy_stop_loss (float): Constant used to define losing boundary at which buy order 

47 (long) is closed. 

48 buy_take_profit (float): Constant used to define winning boundary at which buy order 

49 (long) is closed. 

50 penalty_starts (int): Constant defining how many trading periods can trader go without placing 

51 an order until penalty is imposed. Penalty at range between start and stop constant 

52 is calculated as percentile of positive reward, and subtracted from the actual reward. 

53 penalty_stops (int): Constant defining at which trading period penalty will no longer be increased. 

54 Reward for trading periods exceeding penalty stop constant will equal minus static reward adjustment. 

55 static_reward_adjustment (float): Constant use to penalize trader for bad choices or 

56 reward it for good one. 

57 """ 

58 

59 self.__data: pd.DataFrame = pd.read_csv(data_path) 

60 self.__broker: Broker = Broker() 

61 self.__validator: RewardValidatorBase = validator 

62 

63 self.__trading_data: SimpleNamespace = SimpleNamespace() 

64 self.__trading_data.current_budget: float = initial_budget 

65 self.__trading_data.currently_invested: float = 0 

66 self.__trading_data.no_trades_placed_for: int = 0 

67 self.__trading_data.currently_placed_trades: int = 0 

68 

69 self.__trading_consts = SimpleNamespace() 

70 self.__trading_consts.INITIAL_BUDGET: float = initial_budget 

71 self.__trading_consts.MAX_AMOUNT_OF_TRADES: int = max_amount_of_trades 

72 self.__trading_consts.WINDOW_SIZE: int = window_size 

73 self.__trading_consts.SELL_STOP_LOSS: float = sell_stop_loss 

74 self.__trading_consts.SELL_TAKE_PROFIT: float = sell_take_profit 

75 self.__trading_consts.BUY_STOP_LOSS: float = buy_stop_loss 

76 self.__trading_consts.BUY_TAKE_PROFIT: float = buy_take_profit 

77 self.__trading_consts.STATIC_REWARD_ADJUSTMENT: float = static_reward_adjustment 

78 self.__trading_consts.PENALTY_STARTS: int = penalty_starts 

79 self.__trading_consts.PENALTY_STOPS: int = penalty_stops 

80 self.__trading_consts.PROFITABILITY_FUNCTION = lambda x: -1.0 * math.exp(-x + 1) + 1 

81 self.__trading_consts.PENALTY_FUNCTION = lambda x: \ 

82 min(1, 1 - math.tanh(-3.0 * (x - penalty_stops) / (penalty_stops - penalty_starts))) 

83 

84 self.current_iteration: int = self.__trading_consts.WINDOW_SIZE 

85 self.state: list[float] = self.__prepare_state_data() 

86 self.action_space: Discrete = Discrete(3) 

87 self.observation_space: Box = Box(low = np.ones(len(self.state)) * -3, 

88 high = np.ones(len(self.state)) * 3, 

89 dtype=np.float64) 

90 

91 def __prepare_state_data(self) -> list[float]: 

92 """ 

93 Calculates state data as a list of floats representing current iteration's observation. 

94 Observations contains all input data refined to window size and couple of coefficients 

95 giving an insight into current budget and orders situation. 

96 

97 Returns: 

98 (list[float]): List with current observations for environment. 

99 """ 

100 

101 current_market_data = self.__data.iloc[self.current_iteration - self.__trading_consts.WINDOW_SIZE : self.current_iteration] 

102 current_market_data_no_index = current_market_data.select_dtypes(include = [np.number]) 

103 normalized_current_market_data_values = pd.DataFrame(StandardScaler().fit_transform(current_market_data_no_index), 

104 columns = current_market_data_no_index.columns).values 

105 current_marked_data_list = normalized_current_market_data_values.ravel().tolist() 

106 

107 current_normalized_budget = 1.0 * self.__trading_data.current_budget / self.__trading_consts.INITIAL_BUDGET 

108 current_profitability_coeff = self.__trading_consts.PROFITABILITY_FUNCTION(current_normalized_budget) 

109 current_trades_occupancy_coeff = 1.0 * self.__trading_data.currently_placed_trades / self.__trading_consts.MAX_AMOUNT_OF_TRADES 

110 current_no_trades_penalty_coeff = self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for) 

111 current_inner_state_list = [current_profitability_coeff, current_trades_occupancy_coeff, current_no_trades_penalty_coeff] 

112 

113 return current_marked_data_list + current_inner_state_list 

114 

115 def get_trading_data(self) -> SimpleNamespace: 

116 """ 

117 Trading data getter. 

118 

119 Returns: 

120 (SimpleNamespace): Copy of the namespace with all trading data. 

121 """ 

122 

123 return copy.copy(self.__trading_data) 

124 

125 def get_trading_consts(self) -> SimpleNamespace: 

126 """ 

127 Trading constants getter. 

128 

129 Returns: 

130 (SimpleNamespace): Copy of the namespace with all trading constants. 

131 """ 

132 

133 return copy.copy(self.__trading_consts) 

134 

135 def get_broker(self) -> Broker: 

136 """ 

137 Broker getter. 

138 

139 Returns: 

140 (Broker): Copy of the broker used by environment. 

141 """ 

142 

143 return copy.copy(self.__broker) 

144 

145 def get_environment_length(self) -> int: 

146 """ 

147 Environment length getter. 

148 

149 Returns: 

150 (Int): Length of environment. 

151 """ 

152 

153 return len(self.__data) 

154 

155 def get_environment_spatial_data_dimension(self) -> tuple[int, int]: 

156 """ 

157 Environment spatial data dimensionality getter. 

158 

159 Returns: 

160 (Int): Dimension of spatial data in environment. 

161 """ 

162 

163 return (self.__trading_consts.WINDOW_SIZE, self.__data.shape[1] - 1) 

164 

165 def get_data_for_iteration(self, columns: list[str], start: int, stop: int, step: int = 1) -> list[float]: 

166 """ 

167 Data for certain iterations getter. 

168 

169 Returns: 

170 (list[float]): Copy of part of data with specified columns 

171 over specified iterations. 

172 """ 

173 

174 return copy.copy(self.__data.loc[start:stop:step, columns].values.ravel().tolist()) 

175 

176 def step(self, action: int) -> tuple[list[float], float, bool, dict]: 

177 """ 

178 Performs specified action on environment. It results in generation of the new 

179 observations. This function causes trades to be handled, reward to be calculated and 

180 environment to be updated. 

181 

182 Parameters: 

183 action (int): Number specifing action. Possible values are 0 for buy action, 

184 1 for wait action and 2 for sell action. 

185 

186 Returns: 

187 (tuple[list[float], float, bool, dict]): Tuple containing next observation 

188 state, reward, finish indication and additional info dictionary. 

189 """ 

190 

191 self.current_iteration += 1 

192 self.state = self.__prepare_state_data() 

193 

194 close_changes = self.__data.iloc[self.current_iteration - 2 : self.current_iteration]['close'].values 

195 stock_change_coeff = 1 + (close_changes[1] - close_changes[0]) / close_changes[0] 

196 closed_orders= self.__broker.update_orders(stock_change_coeff) 

197 

198 reward = self.__validator.validate_orders(closed_orders) 

199 self.__trading_data.currently_placed_trades -= len(closed_orders) 

200 self.__trading_data.current_budget += np.sum([trade.current_value for trade in closed_orders]) 

201 self.__trading_data.currently_invested -= np.sum([trade.initial_value for trade in closed_orders]) 

202 

203 number_of_possible_trades = self.__trading_consts.MAX_AMOUNT_OF_TRADES - self.__trading_data.currently_placed_trades 

204 money_to_trade = 0 

205 if number_of_possible_trades > 0: 

206 money_to_trade = 1.0 / number_of_possible_trades * self.__trading_data.current_budget 

207 

208 if action == 0: 

209 is_buy_order = True 

210 stop_loss = self.__trading_consts.SELL_STOP_LOSS 

211 take_profit = self.__trading_consts.SELL_TAKE_PROFIT 

212 elif action == 2: 

213 is_buy_order = False 

214 stop_loss = self.__trading_consts.BUY_STOP_LOSS 

215 take_profit = self.__trading_consts.BUY_TAKE_PROFIT 

216 

217 if action != 1: 

218 if number_of_possible_trades > 0: 

219 self.__trading_data.current_budget -= money_to_trade 

220 self.__trading_data.currently_invested += money_to_trade 

221 self.__broker.place_order(money_to_trade, is_buy_order, stop_loss, take_profit) 

222 self.__trading_data.currently_placed_trades += 1 

223 self.__trading_data.no_trades_placed_for = 0 

224 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

225 else: 

226 self.__trading_data.no_trades_placed_for += 1 

227 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

228 else: 

229 self.__trading_data.no_trades_placed_for += 1 

230 if number_of_possible_trades == 0: 

231 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

232 

233 if number_of_possible_trades > 0: 

234 reward *= (1 - self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)) \ 

235 if reward > 0 else 1 

236 if self.__trading_consts.PENALTY_STOPS < self.__trading_data.no_trades_placed_for: 

237 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

238 

239 if (self.current_iteration >= len(self.__data) or 

240 self.__trading_data.current_budget > 10 * self.__trading_consts.INITIAL_BUDGET or 

241 (self.__trading_data.current_budget + self.__trading_data.currently_invested) / self.__trading_consts.INITIAL_BUDGET < 0.8): 

242 done = True 

243 else: 

244 done = False 

245 

246 info = {'coeff': stock_change_coeff, 

247 'iteration': self.current_iteration, 

248 'number_of_closed_orders': len(closed_orders), 

249 'money_to_trade': money_to_trade, 

250 'action': action, 

251 'current_budget': self.__trading_data.current_budget, 

252 'currently_invested': self.__trading_data.currently_invested, 

253 'no_trades_placed_for': self.__trading_data.no_trades_placed_for, 

254 'currently_placed_trades': self.__trading_data.currently_placed_trades} 

255 

256 return self.state, reward, done, info 

257 

258 def render(self) -> None: 

259 """ 

260 Renders environment visualization. Will be implemented later. 

261 """ 

262 

263 # Visualization to be implemented 

264 pass 

265 

266 def reset(self, randkey: Optional[int] = None) -> list[float]: 

267 """ 

268 Resets environment. Used typically if environemnt is finished, 

269 i.e. when ther is no more steps to be taken within environemnt 

270 or finish conditions are fulfilled. 

271 

272 Parameters: 

273 randkey (Optional[int]): Value indicating what iteration 

274 should be trated as starting point after reset. 

275 

276 Returns: 

277 (list[float]): Current iteration observation state. 

278 """ 

279 

280 if randkey is None: 

281 randkey = random.randint(self.__trading_consts.WINDOW_SIZE, len(self.__data) - 1) 

282 self.__trading_data.current_budget = self.__trading_consts.INITIAL_BUDGET 

283 self.__trading_data.currently_invested = 0 

284 self.__trading_data.no_trades_placed_for = 0 

285 self.__trading_data.currently_placed_trades = 0 

286 self.__broker.reset() 

287 self.current_iteration = randkey 

288 self.state = self.__prepare_state_data() 

289 

290 return self.state