Skip to content

Commit

Permalink
Refactor path handling, changing it to a centralized config file.
Browse files Browse the repository at this point in the history
  • Loading branch information
Francisco Silva committed Feb 20, 2025
1 parent 4839cb6 commit 1c705bc
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 41 deletions.
12 changes: 6 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,12 @@ cython_debug/
.vscode/

# Data
**/data/cache/*
**/data/raw/*
**/data/interim/*
**/data/processed/*
**/data/database/*
**/data/tests/*
data/cache/*
data/raw/*
data/interim/*
data/processed/*
data/database/*
data/tests/*
old/
log/

Expand Down
10 changes: 3 additions & 7 deletions stocksense/app/pages/insights.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import datetime as dt
from pathlib import Path

import pandas as pd
import plotly.express as px
import streamlit as st

from stocksense.config import PORTFOLIO_DIR
from stocksense.database import DatabaseHandler

REPORTS_DIR = Path(__file__).parents[3] / "reports"
SCORES_DIR = REPORTS_DIR / "scores"
PORTFOLIOS_DIR = REPORTS_DIR / "portfolios"


@st.cache_data(show_spinner="Loading stock data...", max_entries=10)
def load_stock_data():
Expand All @@ -22,7 +18,7 @@ def get_available_portfolios():
"""
Get all available portfolio files.
"""
portfolio_files = list(PORTFOLIOS_DIR.glob("portfolio_*.xlsx"))
portfolio_files = list(PORTFOLIO_DIR.glob("portfolio_*.xlsx"))
dates = [dt.datetime.strptime(f.stem.split("_")[1], "%Y-%m-%d").date() for f in portfolio_files]
return sorted(dates, reverse=True)

Expand All @@ -31,7 +27,7 @@ def load_portfolio(trade_date):
"""
Load portfolio for a specific trade date.
"""
portfolio_file = PORTFOLIOS_DIR / f"portfolio_{trade_date}.xlsx"
portfolio_file = PORTFOLIO_DIR / f"portfolio_{trade_date}.xlsx"
if not portfolio_file.exists():
st.error(f"No portfolio found for trade date {trade_date}")
return None
Expand Down
36 changes: 35 additions & 1 deletion stocksense/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,39 @@
from .manager import ConfigManager
from .paths import (
CACHE_DIR,
DATA_DIR,
DATABASE_DIR,
DATABASE_PATH,
FIXTURES_DIR,
INTERIM_DATA_DIR,
MODEL_DIR,
PACKAGE_DIR,
PORTFOLIO_DIR,
PROCESSED_DATA_DIR,
RAW_DATA_DIR,
REPORTS_DIR,
ROOT_DIR,
SCORES_DIR,
TEST_DIR,
)

config = ConfigManager()

__all__ = ["ROOT_PATH", "config"]
__all__ = [
"config",
"CACHE_DIR",
"DATA_DIR",
"DATABASE_DIR",
"DATABASE_PATH",
"FIXTURES_DIR",
"INTERIM_DATA_DIR",
"MODEL_DIR",
"PACKAGE_DIR",
"PORTFOLIO_DIR",
"PROCESSED_DATA_DIR",
"RAW_DATA_DIR",
"REPORTS_DIR",
"ROOT_DIR",
"SCORES_DIR",
"TEST_DIR",
]
44 changes: 44 additions & 0 deletions stocksense/config/paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from pathlib import Path

# Base paths
ROOT_DIR = Path(__file__).parents[2]
PACKAGE_DIR = ROOT_DIR / "stocksense"

# Data paths
DATA_DIR = ROOT_DIR / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
INTERIM_DATA_DIR = DATA_DIR / "interim"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
CACHE_DIR = DATA_DIR / "cache"

# Database paths
DATABASE_DIR = DATA_DIR / "database"
DATABASE_PATH = DATABASE_DIR / "stock_db.db"

# Model paths
MODEL_DIR = ROOT_DIR / "models"

# Report paths
REPORTS_DIR = ROOT_DIR / "reports"
SCORES_DIR = REPORTS_DIR / "scores"
PORTFOLIO_DIR = REPORTS_DIR / "portfolios"

# Test paths
TEST_DIR = ROOT_DIR / "tests"
FIXTURES_DIR = TEST_DIR / "fixtures"

# Ensure required directories exist
REQUIRED_DIRS = [
RAW_DATA_DIR,
INTERIM_DATA_DIR,
PROCESSED_DATA_DIR,
CACHE_DIR,
DATABASE_DIR,
MODEL_DIR,
SCORES_DIR,
PORTFOLIO_DIR,
FIXTURES_DIR,
]

for directory in REQUIRED_DIRS:
directory.mkdir(parents=True, exist_ok=True)
4 changes: 1 addition & 3 deletions stocksense/database/connection.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import sqlite3
from pathlib import Path
from sqlite3 import Error

from loguru import logger

PACKAGE_DIR = Path(__file__).parents[1]
DATABASE_PATH = PACKAGE_DIR / "data/database/stock_db.db"
from stocksense.config import DATABASE_PATH


class DatabaseConnection:
Expand Down
7 changes: 6 additions & 1 deletion stocksense/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@
from .portfolio import PortfolioBuilder
from .xgboost_model import XGBoostClassifier, XGBoostRegressor

__all__ = ["XGBoostRegressor", "XGBoostClassifier", "ModelHandler", "PortfolioBuilder"]
__all__ = [
"ModelHandler",
"PortfolioBuilder",
"XGBoostClassifier",
"XGBoostRegressor",
]
8 changes: 2 additions & 6 deletions stocksense/model/model_handler.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import datetime as dt
import warnings
from pathlib import Path
from typing import List, Optional

import numpy as np
import polars as pl
from loguru import logger

from stocksense.config import config
from stocksense.config import MODEL_DIR, SCORES_DIR, config

from .optuna_optimizer import OptunaOptimizer
from .utils import (
Expand All @@ -18,9 +17,6 @@
)
from .xgboost_model import XGBoostClassifier

MODEL_DIR = Path(__file__).parents[1] / "model" / "model_base"
REPORT_DIR = Path(__file__).parents[2] / "reports" / "scores"

warnings.filterwarnings("ignore")


Expand Down Expand Up @@ -186,7 +182,7 @@ def _save_scoring_report(self, rank_data: pl.DataFrame) -> None:
DataFrame containing ranks for each target and average rank.
"""
try:
report_file = REPORT_DIR / f"scores_{self.trade_date.date()}.csv"
report_file = SCORES_DIR / f"scores_{self.trade_date.date()}.csv"
rank_data.write_csv(report_file)
logger.success(f"SAVED scoring report to {report_file}")
except Exception as e:
Expand Down
5 changes: 2 additions & 3 deletions stocksense/model/portfolio.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import datetime as dt
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl
from loguru import logger

from stocksense.config import PORTFOLIO_DIR
from stocksense.database import DatabaseHandler


Expand All @@ -30,7 +30,6 @@ def __init__(self, weighting: str = "market_cap"):
"""
self.weighting = weighting
self.db = DatabaseHandler()
self.portfolios_dir = Path(__file__).parents[2] / "reports" / "portfolios"

def build_portfolio(
self, n_stocks: int, trade_date: dt.datetime, data: pl.DataFrame
Expand Down Expand Up @@ -193,7 +192,7 @@ def _save_portfolio_excel(self, portfolio: pl.DataFrame, trade_date: dt.datetime
trade_date : dt.datetime
Trade date.
"""
excel_path = self.portfolios_dir / f"portfolio_{trade_date.date()}.xlsx"
excel_path = PORTFOLIO_DIR / f"portfolio_{trade_date.date()}.xlsx"

# Convert to pandas once and rename columns
portfolio_pd = portfolio.rename({
Expand Down
13 changes: 3 additions & 10 deletions stocksense/pipeline/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,11 @@
from loguru import logger
from tqdm import tqdm

from stocksense.config import ConfigManager
from stocksense.config import DATA_DIR, ConfigManager
from stocksense.database import DatabaseHandler

from .scraper import Scraper

PACKAGE_DIR = Path(__file__).parents[1]
DATA_PATH = PACKAGE_DIR / "data"


class ETL:
"""
Expand All @@ -28,7 +25,7 @@ def __init__(self, config: ConfigManager, stocks: Optional[list[str]] = None):
self.db_schema: dict = config.database.db_schema
self.base_date: str = config.scraping.base_date
self.fin_source: str = "yfinance"
self.historical_data_path: Path = DATA_PATH / "interim"
self.historical_data_path: Path = DATA_DIR / "interim"
self._update_index_listings()
self.stocks: list[str] = stocks or self._set_default_stocks()

Expand Down Expand Up @@ -358,7 +355,7 @@ def restore_delisted_stocks_data(self) -> None:
)

logger.info(f"Restoring market data for {delisted_stocks}")
prices_file = DATA_PATH / "raw" / "prices_2005-01-01_2018-12-31.csv"
prices_file = DATA_DIR / "raw" / "prices_2005-01-01_2018-12-31.csv"

with open(prices_file) as f:
data_types = f.readline().strip().split(',')[1:]
Expand Down Expand Up @@ -411,11 +408,7 @@ def ingest_all_historical_data(self):
"""
Ingest historical stock data stored in .csv files.
"""

# read snapshot of S&P500 constituents and store in stocks info table
self._ingest_stock_list()

# iterate over stock historical and ingest it
base_folder = self.historical_data_path / "company_data"
for stock_folder in os.listdir(base_folder):
stock_path = base_folder / stock_folder
Expand Down
4 changes: 0 additions & 4 deletions stocksense/pipeline/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import datetime as dt
from pathlib import Path

import numpy as np
import polars as pl
Expand All @@ -9,9 +8,6 @@
from stocksense.config import config
from stocksense.database import DatabaseHandler

DATA_PATH = Path(__file__).parents[1] / "data"
FIXTURE_PATH = Path(__file__).parents[2] / "tests" / "fixtures"


def engineer_features() -> pl.DataFrame:
"""
Expand Down

0 comments on commit 1c705bc

Please sign in to comment.