Skip to content

Commit

Permalink
Add testing framework to processing pipeline. Minor feature engineeri…
Browse files Browse the repository at this point in the history
…ng corrections.
  • Loading branch information
Francisco Silva committed Nov 17, 2024
1 parent 97e2285 commit d18fd49
Show file tree
Hide file tree
Showing 19 changed files with 443 additions and 151 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ htmlcov/
.coverage
.coverage.*
.cache
*.cache
nosetests.xml
coverage.xml
*.cover
Expand Down
46 changes: 23 additions & 23 deletions notebooks/db_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -18,14 +18,14 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-11-08 14:40:46.110\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mdatabase_handler.schema\u001b[0m:\u001b[36mcreate_tables\u001b[0m:\u001b[36m116\u001b[0m - \u001b[32m\u001b[1mTables created successfully\u001b[0m\n"
"\u001b[32m2024-11-15 15:59:57.517\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mdatabase_handler.schema\u001b[0m:\u001b[36mcreate_tables\u001b[0m:\u001b[36m122\u001b[0m - \u001b[32m\u001b[1mTables created successfully\u001b[0m\n"
]
}
],
Expand All @@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand All @@ -48,30 +48,30 @@
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (940, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>tic</th><th>name</th><th>sector</th><th>last_update</th><th>spx_status</th><th>active</th></tr><tr><td>str</td><td>str</td><td>str</td><td>date</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;ABI&quot;</td><td>null</td><td>&quot;Industrials&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ABKFQ&quot;</td><td>null</td><td>&quot;Financials&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ABS&quot;</td><td>null</td><td>&quot;Consumer Staples&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ACV&quot;</td><td>null</td><td>&quot;Consumer Staples&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ANRZQ&quot;</td><td>null</td><td>&quot;Materials&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;ZBH&quot;</td><td>&quot;Zimmer Biomet&quot;</td><td>&quot;Health Care&quot;</td><td>2024-11-01</td><td>1</td><td>1</td></tr><tr><td>&quot;ZTS&quot;</td><td>&quot;Zoetis&quot;</td><td>&quot;Health Care&quot;</td><td>2024-11-01</td><td>1</td><td>1</td></tr><tr><td>&quot;ERIE&quot;</td><td>&quot;Erie Indemnity&quot;</td><td>&quot;Financials&quot;</td><td>2024-11-01</td><td>1</td><td>1</td></tr><tr><td>&quot;PLTR&quot;</td><td>&quot;Palantir Technologies&quot;</td><td>&quot;Information Technology&quot;</td><td>2024-11-01</td><td>1</td><td>1</td></tr><tr><td>&quot;SW&quot;</td><td>&quot;Smurfit WestRock&quot;</td><td>&quot;Materials&quot;</td><td>2024-11-01</td><td>1</td><td>1</td></tr></tbody></table></div>"
"<small>shape: (940, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>tic</th><th>name</th><th>sector</th><th>last_update</th><th>spx_status</th><th>active</th></tr><tr><td>str</td><td>str</td><td>str</td><td>date</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;ABI&quot;</td><td>null</td><td>&quot;Industrials&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ABKFQ&quot;</td><td>null</td><td>&quot;Financials&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ABS&quot;</td><td>null</td><td>&quot;Consumer Staples&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ACV&quot;</td><td>null</td><td>&quot;Consumer Staples&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&quot;ANRZQ&quot;</td><td>null</td><td>&quot;Materials&quot;</td><td>2005-01-01</td><td>0</td><td>0</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>&quot;SW&quot;</td><td>&quot;Smurfit WestRock&quot;</td><td>&quot;Materials&quot;</td><td>2024-11-01</td><td>1</td><td>1</td></tr><tr><td>&quot;DELL&quot;</td><td>null</td><td>&quot;Information Technology&quot;</td><td>2024-11-14</td><td>1</td><td>1</td></tr><tr><td>&quot;MMM&quot;</td><td>&quot;3M&quot;</td><td>&quot;Industrials&quot;</td><td>2024-11-14</td><td>1</td><td>1</td></tr><tr><td>&quot;AOS&quot;</td><td>&quot;A. O. Smith&quot;</td><td>&quot;Industrials&quot;</td><td>2024-11-14</td><td>1</td><td>1</td></tr><tr><td>&quot;ABT&quot;</td><td>&quot;Abbott&quot;</td><td>&quot;Health Care&quot;</td><td>2024-11-14</td><td>1</td><td>1</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (940, 6)\n",
"┌───────┬───────────────────────┬────────────────────────┬─────────────┬────────────┬────────┐\n",
"│ tic ┆ name ┆ sector ┆ last_update ┆ spx_status ┆ active │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ str ┆ str ┆ date ┆ i64 ┆ i64 │\n",
"╞═══════╪═══════════════════════╪════════════════════════╪═════════════╪════════════╪════════╡\n",
"│ ABI ┆ null ┆ Industrials ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ABKFQ ┆ null ┆ Financials ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ABS ┆ null ┆ Consumer Staples ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ACV ┆ null ┆ Consumer Staples ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ANRZQ ┆ null ┆ Materials ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"ZBH ┆ Zimmer Biomet ┆ Health Care ┆ 2024-11-01 ┆ 1 ┆ 1 │\n",
"ZTS ┆ Zoetis ┆ Health Care ┆ 2024-11-01 ┆ 1 ┆ 1 │\n",
"ERIE ┆ Erie Indemnity ┆ Financials ┆ 2024-11-01 ┆ 1 ┆ 1 │\n",
"PLTR ┆ Palantir Technologies ┆ Information Technology ┆ 2024-11-01 ┆ 1 ┆ 1 │\n",
"SW ┆ Smurfit WestRock ┆ Materials ┆ 2024-11-01 ┆ 1 ┆ 1 │\n",
"└───────┴───────────────────────┴────────────────────────┴─────────────┴────────────┴────────┘"
"┌───────┬──────────────────┬────────────────────────┬─────────────┬────────────┬────────┐\n",
"│ tic ┆ name ┆ sector ┆ last_update ┆ spx_status ┆ active │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ str ┆ str ┆ str ┆ date ┆ i64 ┆ i64 │\n",
"╞═══════╪══════════════════╪════════════════════════╪═════════════╪════════════╪════════╡\n",
"│ ABI ┆ null ┆ Industrials ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ABKFQ ┆ null ┆ Financials ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ABS ┆ null ┆ Consumer Staples ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ACV ┆ null ┆ Consumer Staples ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ ANRZQ ┆ null ┆ Materials ┆ 2005-01-01 ┆ 0 ┆ 0 │\n",
"│ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n",
"SW ┆ Smurfit WestRock ┆ Materials ┆ 2024-11-01 ┆ 1 ┆ 1 │\n",
"DELL ┆ null ┆ Information Technology ┆ 2024-11-14 ┆ 1 ┆ 1 │\n",
"MMM ┆ 3M ┆ Industrials ┆ 2024-11-14 ┆ 1 ┆ 1 │\n",
"AOS ┆ A. O. Smith ┆ Industrials ┆ 2024-11-14 ┆ 1 ┆ 1 │\n",
"ABT ┆ Abbott ┆ Health Care ┆ 2024-11-14 ┆ 1 ┆ 1 │\n",
"└───────┴──────────────────┴────────────────────────┴─────────────┴────────────┴────────┘"
]
},
"execution_count": 3,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
177 changes: 177 additions & 0 deletions notebooks/mock_data.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ dependencies = [
]

[project.optional-dependencies]
dev = ["pytest", "pytest-cov", "ruff", "nbqa"]
dev = ["pytest", "pytest-cov", "ruff", "nbqa", "pytest-mock"]

[build-system]
requires = ["setuptools"]
Expand Down
5 changes: 3 additions & 2 deletions stocksense/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import click
from model import ModelHandler
from pipeline import ETL, process_stock_data
from pipeline import ETL, clean, engineer_features


@click.command()
Expand All @@ -17,7 +17,8 @@ def main(update, train, score):
etl_handler.update_index_listings()
etl_handler.extract()
if train:
data = process_stock_data()
data = engineer_features()
data = clean(data)
handler = ModelHandler()
handler.train(data)
if score:
Expand Down
3 changes: 2 additions & 1 deletion stocksense/model/model_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class ModelHandler:
def __init__(self):
self.id_col = config.model.id_col
self.date_col = config.model.date_col
self.target_col = config.model.target
self.features = config.model.features
self.target = config.model.target
self.train_start = config.model.train_start
self.train_window = config.model.train_window
self.val_window = config.model.val_window
Expand Down
4 changes: 2 additions & 2 deletions stocksense/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .etl import ETL
from .preprocess import process_stock_data
from .preprocess import clean, engineer_features
from .scraper import Scraper

__all__ = ["Scraper", "ETL", "process_stock_data"]
__all__ = ["Scraper", "ETL", "engineer_features", "clean"]
Loading

0 comments on commit d18fd49

Please sign in to comment.