Skip to content

Commit

Permalink
Reformat ipynbs
Browse files Browse the repository at this point in the history
  • Loading branch information
Francisco Silva committed Nov 25, 2024
1 parent 6238ed2 commit f9aad58
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 30 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,6 @@ log/

# model files
*.pkl

# reports
reports/report_*
41 changes: 20 additions & 21 deletions notebooks/modeling.ipynb

Large diffs are not rendered by default.

144 changes: 144 additions & 0 deletions notebooks/report_analysis.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import polars as pl\n",
"\n",
"from stocksense.database_handler import DatabaseHandler\n",
"\n",
"REPORT_DIR = Path(\"../reports\")\n",
"\n",
"DATE = \"2023-06-01\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-11-25 15:15:00.626\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mstocksense.database_handler.schema\u001b[0m:\u001b[36mcreate_tables\u001b[0m:\u001b[36m122\u001b[0m - \u001b[32m\u001b[1mTables created successfully\u001b[0m\n"
]
},
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (467, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>tic</th><th>score</th><th>name</th><th>sector</th><th>freturn</th><th>adj_freturn</th></tr><tr><td>str</td><td>f64</td><td>str</td><td>str</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>&quot;PARA&quot;</td><td>0.762556</td><td>&quot;Paramount Global&quot;</td><td>&quot;Communication Services&quot;</td><td>-21.502723</td><td>-46.531975</td></tr><tr><td>&quot;FICO&quot;</td><td>0.691881</td><td>&quot;Fair Isaac&quot;</td><td>&quot;Information Technology&quot;</td><td>62.615358</td><td>37.586106</td></tr><tr><td>&quot;KEY&quot;</td><td>0.6825847</td><td>&quot;KeyCorp&quot;</td><td>&quot;Financials&quot;</td><td>59.400882</td><td>34.37163</td></tr><tr><td>&quot;FSLR&quot;</td><td>0.668008</td><td>&quot;First Solar&quot;</td><td>&quot;Information Technology&quot;</td><td>30.190669</td><td>5.161417</td></tr><tr><td>&quot;MPWR&quot;</td><td>0.654031</td><td>&quot;Monolithic Power Systems&quot;</td><td>&quot;Information Technology&quot;</td><td>48.461755</td><td>23.432502</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;DUK&quot;</td><td>0.148876</td><td>&quot;Duke Energy&quot;</td><td>&quot;Utilities&quot;</td><td>22.819521</td><td>-2.209732</td></tr><tr><td>&quot;PPL&quot;</td><td>0.1465794</td><td>&quot;PPL Corporation&quot;</td><td>&quot;Utilities&quot;</td><td>16.684222</td><td>-8.34503</td></tr><tr><td>&quot;ED&quot;</td><td>0.137104</td><td>&quot;Consolidated Edison&quot;</td><td>&quot;Utilities&quot;</td><td>6.764106</td><td>-18.265146</td></tr><tr><td>&quot;DTE&quot;</td><td>0.125249</td><td>&quot;DTE Energy&quot;</td><td>&quot;Utilities&quot;</td><td>12.616302</td><td>-12.41295</td></tr><tr><td>&quot;SO&quot;</td><td>0.11732</td><td>&quot;Southern Company&quot;</td><td>&quot;Utilities&quot;</td><td>20.565807</td><td>-4.463445</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (467, 6)\n",
"┌──────┬───────────┬──────────────────────────┬────────────────────────┬────────────┬─────────────┐\n",
"│ tic ┆ score ┆ name ┆ sector ┆ freturn ┆ adj_freturn │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ str ┆ str ┆ f64 ┆ f64 │\n",
"╞══════╪═══════════╪══════════════════════════╪════════════════════════╪════════════╪═════════════╡\n",
"│ PARA ┆ 0.762556 ┆ Paramount Global ┆ Communication Services ┆ -21.502723 ┆ -46.531975 │\n",
"│ FICO ┆ 0.691881 ┆ Fair Isaac ┆ Information Technology ┆ 62.615358 ┆ 37.586106 │\n",
"│ KEY ┆ 0.6825847 ┆ KeyCorp ┆ Financials ┆ 59.400882 ┆ 34.37163 │\n",
"│ FSLR ┆ 0.668008 ┆ First Solar ┆ Information Technology ┆ 30.190669 ┆ 5.161417 │\n",
"│ MPWR ┆ 0.654031 ┆ Monolithic Power Systems ┆ Information Technology ┆ 48.461755 ┆ 23.432502 │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"│ DUK ┆ 0.148876 ┆ Duke Energy ┆ Utilities ┆ 22.819521 ┆ -2.209732 │\n",
"│ PPL ┆ 0.1465794 ┆ PPL Corporation ┆ Utilities ┆ 16.684222 ┆ -8.34503 │\n",
"│ ED ┆ 0.137104 ┆ Consolidated Edison ┆ Utilities ┆ 6.764106 ┆ -18.265146 │\n",
"│ DTE ┆ 0.125249 ┆ DTE Energy ┆ Utilities ┆ 12.616302 ┆ -12.41295 │\n",
"│ SO ┆ 0.11732 ┆ Southern Company ┆ Utilities ┆ 20.565807 ┆ -4.463445 │\n",
"└──────┴───────────┴──────────────────────────┴────────────────────────┴────────────┴─────────────┘"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stock_info = DatabaseHandler().fetch_stock()\n",
"df = pl.read_csv(REPORT_DIR / f\"report_{DATE}.csv\")\n",
"df = df.join(stock_info, on=\"tic\", how=\"left\")\n",
"df = df.select(pl.col(\"tic\", \"score\", \"name\", \"sector\", \"freturn\", \"adj_freturn\"))\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"DATE 2023-06-01\n",
"\n",
"Top 15 stocks:\n",
"Average freturn: 33.63%\n",
"Average adj_freturn: 8.60%\n",
"\n",
"Bottom 15 stocks:\n",
"Average freturn: 7.26%\n",
"Average adj_freturn: -17.77%\n"
]
}
],
"source": [
"n = 15\n",
"top = df.head(n)\n",
"bottom = df.tail(n)\n",
"\n",
"# Calculate average returns for top stocks\n",
"top_freturn = top.select(pl.col(\"freturn\")).mean().item()\n",
"top_adj_freturn = top.select(pl.col(\"adj_freturn\")).mean().item()\n",
"\n",
"# Calculate average returns for bottom stocks\n",
"bottom_freturn = bottom.select(pl.col(\"freturn\")).mean().item()\n",
"bottom_adj_freturn = bottom.select(pl.col(\"adj_freturn\")).mean().item()\n",
"\n",
"print(f\"\\nDATE {DATE}\")\n",
"print(f\"\\nTop {n} stocks:\")\n",
"print(f\"Average freturn: {top_freturn:.2f}%\")\n",
"print(f\"Average adj_freturn: {top_adj_freturn:.2f}%\")\n",
"print(f\"\\nBottom {n} stocks:\")\n",
"print(f\"Average freturn: {bottom_freturn:.2f}%\")\n",
"print(f\"Average adj_freturn: {bottom_adj_freturn:.2f}%\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
16 changes: 8 additions & 8 deletions stocksense/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from datetime import datetime

import click

from stocksense.config import config
Expand All @@ -18,8 +16,12 @@ def prepare_data():
@click.option("-u", "--update", is_flag=True, help="Update stock data.")
@click.option("-t", "--train", is_flag=True, help="Train model.")
@click.option("-s", "--score", is_flag=True, help="Score stocks.")
@click.option("-b", "--backtest", is_flag=True, help="Backtest model.")
def main(update, train, score, backtest):
@click.option(
"--trade-date",
type=click.DateTime(formats=["%Y-%m-%d"]),
help="Trade date for model operations (format: YYYY-MM-DD)",
)
def main(update, train, score, trade_date):
"""
CLI handling.
"""
Expand All @@ -29,16 +31,14 @@ def main(update, train, score, backtest):
etl_handler.update_index_listings()
etl_handler.extract()

if any([train, score, backtest]):
if any([train, score]):
data = prepare_data()
stocks = DatabaseHandler().fetch_sp500_stocks()
handler = ModelHandler(stocks)
handler = ModelHandler(stocks, trade_date)
if train:
handler.train(data)
if score:
handler.score(data)
if backtest:
handler.backtest(data, [datetime(2023, 9, 1)])


if __name__ == "__main__":
Expand Down
10 changes: 9 additions & 1 deletion stocksense/model/genetic_algorithm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import polars as pl
import pygad
from loguru import logger
Expand Down Expand Up @@ -97,6 +98,11 @@ def plot_fitness(self):
self.ga_instance.plot_fitness()


def top_k_precision(y_true, y_proba, k=100):
top_k_indices = np.argsort(y_proba)[-k:]
return np.mean(y_true[top_k_indices])


def get_train_val_splits(data: pl.DataFrame, stocks: list[str], min_train_years: int = 5):
"""
Generate training/validation splits using expanding window approach.
Expand Down Expand Up @@ -172,7 +178,9 @@ def fitness_function(ga_instance, solution, solution_idx):
y_val = val.select(target).to_pandas().values.ravel()

model.train(X_train, y_train)
perf = model.pr_auc(X_val, y_val)
y_proba = model.predict_proba(X_val)

perf = top_k_precision(y_val, y_proba, k=100)
perfs.append(perf)

return sum(perfs) / len(perfs)
Expand Down
4 changes: 4 additions & 0 deletions stocksense/model/xgboost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def roc_auc(self, X_test, y_test):
y_proba = self.predict_proba(X_test)
return skm.roc_auc_score(y_test, y_proba)

def ndcg_score(self, X_test, y_test, k=None):
y_proba = self.predict_proba(X_test).reshape(1, -1)
return skm.ndcg_score(y_test.reshape(1, -1), y_proba, k=k)

def get_importance(self, importance_type="gain"):
importance = self.model.get_booster().get_score(importance_type=importance_type)
return sorted(importance.items(), key=lambda x: x[1], reverse=True)
Expand Down

0 comments on commit f9aad58

Please sign in to comment.