Coverage for source/environment/trading_environment.py: 93%

134 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-31 06:53 +0000

1# environment/trading environment.py 

2 

3from gym import Env 

4from gym.spaces import Discrete, Box 

5import pandas as pd 

6import numpy as np 

7from sklearn.preprocessing import StandardScaler 

8import math 

9import random 

10from types import SimpleNamespace 

11from typing import Optional 

12import copy 

13 

14from .broker import Broker 

15from .reward_validator_base import RewardValidatorBase 

16 

17class TradingEnvironment(Env): 

18 """ 

19 Implements stock market environment that actor can perform actions (place orders) in. 

20 It is used to train Neural Network models with reinforcement learning approach. Can be 

21 configure to award points and impose a penalty in a several way. 

22 """ 

23 

24 TRAIN_MODE = 'train' 

25 TEST_MODE = 'test' 

26 

27 def __init__(self, data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int, 

28 validator: RewardValidatorBase, sell_stop_loss: float, sell_take_profit: float, 

29 buy_stop_loss: float, buy_take_profit: float, test_ratio: float = 0.2, penalty_starts: int = 0, 

30 penalty_stops: int = 10, static_reward_adjustment: float = 1) -> None: 

31 """ 

32 Class constructor. Allows to define all crucial constans, reward validation methods, 

33 environmental penalty policies, etc. 

34 

35 Parameters: 

36 data_path (str): Path to CSV data that should be used as enivronmental stock market. 

37 initial_budget (float): Initial budget constant for trader to start from. 

38 max_amount_of_trades (int): Max amount of trades that can be ongoing at the same time. 

39 Seting this constant prevents traders from placing orders randomly and defines 

40 amount of money that can be assigned to a single order at certain iteration. 

41 window_size (int): Constant defining how far in the past trader will be able to look 

42 into at certain iteration. 

43 validator (RewardValidatorBase): Validator implementing policy used to award points 

44 for closed trades. 

45 sell_stop_loss (float): Constant used to define losing boundary at which sell order 

46 (short) is closed. 

47 sell_take_profit (float): Constant used to define winning boundary at which sell order 

48 (short) is closed. 

49 buy_stop_loss (float): Constant used to define losing boundary at which buy order 

50 (long) is closed. 

51 buy_take_profit (float): Constant used to define winning boundary at which buy order 

52 (long) is closed. 

53 test_ratio (float): Ratio of data that should be used for testing purposes. 

54 penalty_starts (int): Constant defining how many trading periods can trader go without placing 

55 an order until penalty is imposed. Penalty at range between start and stop constant 

56 is calculated as percentile of positive reward, and subtracted from the actual reward. 

57 penalty_stops (int): Constant defining at which trading period penalty will no longer be increased. 

58 Reward for trading periods exceeding penalty stop constant will equal minus static reward adjustment. 

59 static_reward_adjustment (float): Constant use to penalize trader for bad choices or 

60 reward it for good one. 

61 """ 

62 

63 if test_ratio < 0.0 or test_ratio >= 1.0: 

64 raise ValueError(f"Invalid test_ratio: {test_ratio}. It should be in range [0, 1).") 

65 

66 self.__data: dict[pd.DataFrame, pd.DataFrame] = self.__load_data(data_path, test_ratio) 

67 self.__mode = TradingEnvironment.TRAIN_MODE 

68 self.__broker: Broker = Broker() 

69 self.__validator: RewardValidatorBase = validator 

70 

71 self.__trading_data: SimpleNamespace = SimpleNamespace() 

72 self.__trading_data.current_budget: float = initial_budget 

73 self.__trading_data.currently_invested: float = 0 

74 self.__trading_data.no_trades_placed_for: int = 0 

75 self.__trading_data.currently_placed_trades: int = 0 

76 

77 self.__trading_consts = SimpleNamespace() 

78 self.__trading_consts.INITIAL_BUDGET: float = initial_budget 

79 self.__trading_consts.MAX_AMOUNT_OF_TRADES: int = max_amount_of_trades 

80 self.__trading_consts.WINDOW_SIZE: int = window_size 

81 self.__trading_consts.SELL_STOP_LOSS: float = sell_stop_loss 

82 self.__trading_consts.SELL_TAKE_PROFIT: float = sell_take_profit 

83 self.__trading_consts.BUY_STOP_LOSS: float = buy_stop_loss 

84 self.__trading_consts.BUY_TAKE_PROFIT: float = buy_take_profit 

85 self.__trading_consts.STATIC_REWARD_ADJUSTMENT: float = static_reward_adjustment 

86 self.__trading_consts.PENALTY_STARTS: int = penalty_starts 

87 self.__trading_consts.PENALTY_STOPS: int = penalty_stops 

88 self.__trading_consts.PROFITABILITY_FUNCTION = lambda x: -1.0 * math.exp(-x + 1) + 1 

89 self.__trading_consts.PENALTY_FUNCTION = lambda x: \ 

90 min(1, 1 - math.tanh(-3.0 * (x - penalty_stops) / (penalty_stops - penalty_starts))) 

91 

92 self.current_iteration: int = self.__trading_consts.WINDOW_SIZE 

93 self.state: list[float] = self.__prepare_state_data() 

94 self.action_space: Discrete = Discrete(3) 

95 self.observation_space: Box = Box(low = np.ones(len(self.state)) * -3, 

96 high = np.ones(len(self.state)) * 3, 

97 dtype=np.float64) 

98 

99 def __load_data(self, data_path: str, test_size: float) -> dict[pd.DataFrame, pd.DataFrame]: 

100 """ 

101 Loads data from CSV file and splits it into training and testing sets based on the 

102 specified test size ratio. 

103 

104 Parameters: 

105 data_path (str): Path to the CSV file containing the stock market data. 

106 test_size (float): Ratio of the data to be used for testing. 

107 

108 Returns: 

109 (dict[pd.DataFrame, pd.DataFrame]): Dictionary containing training and testing data frames. 

110 """ 

111 

112 data_frame = pd.read_csv(data_path) 

113 dividing_index = int(len(data_frame) * (1 - test_size)) 

114 

115 return { 

116 TradingEnvironment.TRAIN_MODE: data_frame.iloc[:dividing_index], 

117 TradingEnvironment.TEST_MODE: data_frame.iloc[dividing_index:] 

118 } 

119 

120 def __prepare_state_data(self) -> list[float]: 

121 """ 

122 Calculates state data as a list of floats representing current iteration's observation. 

123 Observations contains all input data refined to window size and couple of coefficients 

124 giving an insight into current budget and orders situation. 

125 

126 Returns: 

127 (list[float]): List with current observations for environment. 

128 """ 

129 

130 current_market_data = self.__data[self.__mode].iloc[self.current_iteration - self.__trading_consts.WINDOW_SIZE : self.current_iteration] 

131 current_market_data_no_index = current_market_data.select_dtypes(include = [np.number]) 

132 normalized_current_market_data_values = pd.DataFrame(StandardScaler().fit_transform(current_market_data_no_index), 

133 columns = current_market_data_no_index.columns).values 

134 current_marked_data_list = normalized_current_market_data_values.ravel().tolist() 

135 

136 current_normalized_budget = 1.0 * self.__trading_data.current_budget / self.__trading_consts.INITIAL_BUDGET 

137 current_profitability_coeff = self.__trading_consts.PROFITABILITY_FUNCTION(current_normalized_budget) 

138 current_trades_occupancy_coeff = 1.0 * self.__trading_data.currently_placed_trades / self.__trading_consts.MAX_AMOUNT_OF_TRADES 

139 current_no_trades_penalty_coeff = self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for) 

140 current_inner_state_list = [current_profitability_coeff, current_trades_occupancy_coeff, current_no_trades_penalty_coeff] 

141 

142 return current_marked_data_list + current_inner_state_list 

143 

144 def set_mode(self, mode: str) -> None: 

145 """ 

146 Sets the mode of the environment to either TRAIN_MODE or TEST_MODE. 

147 

148 Parameters: 

149 mode (str): Mode to set for the environment. 

150 

151 Raises: 

152 ValueError: If the provided mode is not valid. 

153 """ 

154 

155 if mode not in [TradingEnvironment.TRAIN_MODE, TradingEnvironment.TEST_MODE]: 

156 raise ValueError(f"Invalid mode: {mode}. Use TradingEnvironment.TRAIN_MODE or TradingEnvironment.TEST_MODE.") 

157 self.__mode = mode 

158 

159 def get_mode(self) -> str: 

160 """ 

161 Mode getter. 

162 

163 Returns: 

164 (str): Current mode of the environment. 

165 """ 

166 

167 return copy.copy(self.__mode) 

168 

169 def get_trading_data(self) -> SimpleNamespace: 

170 """ 

171 Trading data getter. 

172 

173 Returns: 

174 (SimpleNamespace): Copy of the namespace with all trading data. 

175 """ 

176 

177 return copy.copy(self.__trading_data) 

178 

179 def get_trading_consts(self) -> SimpleNamespace: 

180 """ 

181 Trading constants getter. 

182 

183 Returns: 

184 (SimpleNamespace): Copy of the namespace with all trading constants. 

185 """ 

186 

187 return copy.copy(self.__trading_consts) 

188 

189 def get_broker(self) -> Broker: 

190 """ 

191 Broker getter. 

192 

193 Returns: 

194 (Broker): Copy of the broker used by environment. 

195 """ 

196 

197 return copy.copy(self.__broker) 

198 

199 def get_environment_length(self) -> int: 

200 """ 

201 Environment length getter. 

202 

203 Returns: 

204 (Int): Length of environment. 

205 """ 

206 

207 return len(self.__data[self.__mode]) 

208 

209 def get_environment_spatial_data_dimension(self) -> tuple[int, int]: 

210 """ 

211 Environment spatial data dimensionality getter. 

212 

213 Returns: 

214 (Int): Dimension of spatial data in environment. 

215 """ 

216 

217 return (self.__trading_consts.WINDOW_SIZE, self.__data[self.__mode].shape[1] - 1) 

218 

219 def get_data_for_iteration(self, columns: list[str], start: int, stop: int, step: int = 1) -> list[float]: 

220 """ 

221 Data for certain iterations getter. 

222 

223 Parameters: 

224 columns (list[str]): List of column names to extract from data. 

225 start (int): Start iteration index. 

226 stop (int): Stop iteration index. 

227 step (int): Step between iterations. Default is 1. 

228 

229 Returns: 

230 (list[float]): Copy of part of data with specified columns 

231 over specified iterations. 

232 """ 

233 

234 return copy.copy(self.__data[self.__mode].loc[start:stop:step, columns].values.ravel().tolist()) 

235 

236 def step(self, action: int) -> tuple[list[float], float, bool, dict]: 

237 """ 

238 Performs specified action on environment. It results in generation of the new 

239 observations. This function causes trades to be handled, reward to be calculated and 

240 environment to be updated. 

241 

242 Parameters: 

243 action (int): Number specifing action. Possible values are 0 for buy action, 

244 1 for wait action and 2 for sell action. 

245 

246 Returns: 

247 (tuple[list[float], float, bool, dict]): Tuple containing next observation 

248 state, reward, finish indication and additional info dictionary. 

249 """ 

250 

251 self.current_iteration += 1 

252 self.state = self.__prepare_state_data() 

253 

254 close_changes = self.__data[self.__mode].iloc[self.current_iteration - 2 : self.current_iteration]['close'].values 

255 stock_change_coeff = 1 + (close_changes[1] - close_changes[0]) / close_changes[0] 

256 closed_orders= self.__broker.update_orders(stock_change_coeff) 

257 

258 reward = self.__validator.validate_orders(closed_orders) 

259 self.__trading_data.currently_placed_trades -= len(closed_orders) 

260 self.__trading_data.current_budget += np.sum([trade.current_value for trade in closed_orders]) 

261 self.__trading_data.currently_invested -= np.sum([trade.initial_value for trade in closed_orders]) 

262 

263 number_of_possible_trades = self.__trading_consts.MAX_AMOUNT_OF_TRADES - self.__trading_data.currently_placed_trades 

264 money_to_trade = 0 

265 if number_of_possible_trades > 0: 

266 money_to_trade = 1.0 / number_of_possible_trades * self.__trading_data.current_budget 

267 

268 if action == 0: 

269 is_buy_order = True 

270 stop_loss = self.__trading_consts.SELL_STOP_LOSS 

271 take_profit = self.__trading_consts.SELL_TAKE_PROFIT 

272 elif action == 2: 

273 is_buy_order = False 

274 stop_loss = self.__trading_consts.BUY_STOP_LOSS 

275 take_profit = self.__trading_consts.BUY_TAKE_PROFIT 

276 

277 if action != 1: 

278 if number_of_possible_trades > 0: 

279 self.__trading_data.current_budget -= money_to_trade 

280 self.__trading_data.currently_invested += money_to_trade 

281 self.__broker.place_order(money_to_trade, is_buy_order, stop_loss, take_profit) 

282 self.__trading_data.currently_placed_trades += 1 

283 self.__trading_data.no_trades_placed_for = 0 

284 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

285 else: 

286 self.__trading_data.no_trades_placed_for += 1 

287 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

288 else: 

289 self.__trading_data.no_trades_placed_for += 1 

290 if number_of_possible_trades == 0: 

291 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

292 

293 if number_of_possible_trades > 0: 

294 reward *= (1 - self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)) \ 

295 if reward > 0 else 1 

296 if self.__trading_consts.PENALTY_STOPS < self.__trading_data.no_trades_placed_for: 

297 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

298 

299 if (self.current_iteration >= len(self.__data[self.__mode]) or 

300 self.__trading_data.current_budget > 10 * self.__trading_consts.INITIAL_BUDGET or 

301 (self.__trading_data.current_budget + self.__trading_data.currently_invested) / self.__trading_consts.INITIAL_BUDGET < 0.8): 

302 done = True 

303 else: 

304 done = False 

305 

306 info = {'coeff': stock_change_coeff, 

307 'iteration': self.current_iteration, 

308 'number_of_closed_orders': len(closed_orders), 

309 'money_to_trade': money_to_trade, 

310 'action': action, 

311 'current_budget': self.__trading_data.current_budget, 

312 'currently_invested': self.__trading_data.currently_invested, 

313 'no_trades_placed_for': self.__trading_data.no_trades_placed_for, 

314 'currently_placed_trades': self.__trading_data.currently_placed_trades} 

315 

316 return self.state, reward, done, info 

317 

318 def render(self) -> None: 

319 """ 

320 Renders environment visualization. Will be implemented later. 

321 """ 

322 

323 #TODO: Visualization to be implemented 

324 pass 

325 

326 def reset(self, randkey: Optional[int] = None) -> list[float]: 

327 """ 

328 Resets environment. Used typically if environemnt is finished, 

329 i.e. when ther is no more steps to be taken within environemnt 

330 or finish conditions are fulfilled. 

331 

332 Parameters: 

333 randkey (Optional[int]): Value indicating what iteration 

334 should be trated as starting point after reset. 

335 

336 Returns: 

337 (list[float]): Current iteration observation state. 

338 """ 

339 

340 if randkey is None: 

341 randkey = random.randint(self.__trading_consts.WINDOW_SIZE, len(self.__data[self.__mode]) - 1) 

342 self.__trading_data.current_budget = self.__trading_consts.INITIAL_BUDGET 

343 self.__trading_data.currently_invested = 0 

344 self.__trading_data.no_trades_placed_for = 0 

345 self.__trading_data.currently_placed_trades = 0 

346 self.__broker.reset() 

347 self.current_iteration = randkey 

348 self.state = self.__prepare_state_data() 

349 

350 return self.state