Skip to content

Commit

Permalink
Enhance target engineering
Browse files Browse the repository at this point in the history
  • Loading branch information
Francisco Silva committed Feb 18, 2025
1 parent 56ba0b0 commit b90dc79
Show file tree
Hide file tree
Showing 19 changed files with 202 additions and 420 deletions.
2 changes: 1 addition & 1 deletion notebooks/eda.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74349,7 +74349,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "stocksense",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
Expand Down
147 changes: 49 additions & 98 deletions notebooks/mock_data.ipynb

Large diffs are not rendered by default.

218 changes: 0 additions & 218 deletions notebooks/portfolio_analysis.ipynb

This file was deleted.

1 change: 1 addition & 0 deletions stocksense/config/defaults/model_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
'targets':
- 'aggressive_hit'
- 'moderate_hit'
- 'relaxed_hit'
'id_col': 'tic'
'date_col': 'tdq'
'max_train_years': 10
Expand Down
4 changes: 4 additions & 0 deletions stocksense/database/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def delete_financials(self, tic: str) -> None:
conn = self.db.get_connection()
delete_data(conn, "financial", {"tic": tic})

def delete_market_data(self, tic: str) -> None:
conn = self.db.get_connection()
delete_data(conn, "market", {"tic": tic})

def update_stock(self, tic: str, update_values: dict) -> None:
conn = self.db.get_connection()
update_data(conn, "stock", update_values, {"tic": tic})
Expand Down
5 changes: 1 addition & 4 deletions stocksense/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from stocksense import __version__
from stocksense.config import config
from stocksense.database import DatabaseHandler
from stocksense.model import ModelHandler, PortfolioBuilder
from stocksense.pipeline import ETL, clean, engineer_features

Expand Down Expand Up @@ -86,10 +85,8 @@ def portfolio(trade_date: dt.datetime, weighting: str, n_stocks: int):
"""Build investment portfolio for a specific trade date."""

data = prepare_data()
constituents = DatabaseHandler().fetch_constituents(trade_date)

handler = ModelHandler(trade_date)
ranks = handler.score(data, constituents)
ranks = handler.score(data)

portfolio = PortfolioBuilder(weighting)
portfolio.build_portfolio(n_stocks, trade_date, ranks)
Expand Down
21 changes: 8 additions & 13 deletions stocksense/model/model_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def train(self, data: pl.DataFrame, retrain: bool = False) -> None:
trade_date_model_dir = MODEL_DIR / f"{self.trade_date.date()}"
trade_date_model_dir.mkdir(parents=True, exist_ok=True)
model_file = trade_date_model_dir / f"{target}.pkl"

if model_file.exists() and not retrain:
logger.warning(f"Model already exists for {target}, {self.trade_date}")
continue
Expand All @@ -70,8 +69,7 @@ def train(self, data: pl.DataFrame, retrain: bool = False) -> None:
(~pl.all_horizontal(pl.col(target).is_null()))
).select(["tdq", "tic"] + self.features + [target])

best_params = self.optimize(train, self.features, target)

best_params = self._optimize(train, self.features, target)
train = train.filter(pl.col("tdq") > start_date)
final_params = format_xgboost_params(best_params, 100)

Expand All @@ -80,7 +78,6 @@ def train(self, data: pl.DataFrame, retrain: bool = False) -> None:

X_train = train.select(self.features).to_pandas()
y_train = train.select(target).to_pandas().values.ravel()

model = XGBoostClassifier(final_params)
model.train(X_train, y_train)
model.save_model(model_file)
Expand All @@ -90,7 +87,7 @@ def train(self, data: pl.DataFrame, retrain: bool = False) -> None:
logger.error(f"ERROR: failed to train model - {e}")
raise

def optimize(
def _optimize(
self,
train: pl.DataFrame,
features: List[str],
Expand All @@ -108,7 +105,7 @@ def optimize(
target : str
Target variable to optimize model for.
"""
optimizer = OptunaOptimizer(n_trials=600)
optimizer = OptunaOptimizer(n_trials=500)
best_solution = optimizer.optimize(
train,
features,
Expand All @@ -117,16 +114,14 @@ def optimize(
)
return best_solution

def score(self, data: pl.DataFrame, stocks: list[str]) -> None:
def score(self, data: pl.DataFrame) -> None:
"""
Score stocks using rank-based ensemble of target-specific models.
Parameters
----------
data : pl.DataFrame
Preprocessed financial data.
stocks : list[str]
List of stocks to score.
Returns
-------
Expand All @@ -135,12 +130,12 @@ def score(self, data: pl.DataFrame, stocks: list[str]) -> None:
"""
try:
logger.info(f"START stocksense eval - {self.trade_date}")
test = data.filter((pl.col("tdq") == self.trade_date) & pl.col("tic").is_in(stocks))
test = data.filter((pl.col("tdq") == self.trade_date))
final_ranks = test.clone()
pred_cols = []
perc_cols = []

# Get predictions for each target
# Score along each target
for target in self.targets:
trade_date_model_dir = MODEL_DIR / f"{self.trade_date.date()}"
model_file = trade_date_model_dir / f"{target}.pkl"
Expand Down Expand Up @@ -174,14 +169,14 @@ def score(self, data: pl.DataFrame, stocks: list[str]) -> None:
).sort("avg_score", descending=True)

report_cols = ["tic", "adj_close", "max_return_4Q", "fwd_return_4Q", "avg_score"]
self.save_scoring_report(final_ranks.select(report_cols + pred_cols))
self._save_scoring_report(final_ranks.select(report_cols + pred_cols))

return final_ranks
except Exception as e:
logger.error(f"ERROR: failed to score stocks - {e}")
raise

def save_scoring_report(self, rank_data: pl.DataFrame) -> None:
def _save_scoring_report(self, rank_data: pl.DataFrame) -> None:
"""
Save scoring report csv with ranks for each target and average rank.
Expand Down
Loading

0 comments on commit b90dc79

Please sign in to comment.