From b10d6f207e4a0350ea7cb9d22af018990ef4085f Mon Sep 17 00:00:00 2001 From: Andrew Schell Date: Fri, 24 Jan 2025 00:19:14 -0800 Subject: [PATCH 1/2] moved files from root/db to root/data --- dataframes.py | 2 +- main.py | 4 ++-- parse.py | 9 ++++----- requirements.txt | 15 +++++++++++++++ src/fetch.py | 6 +++--- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/dataframes.py b/dataframes.py index 3b1df14..33f9736 100644 --- a/dataframes.py +++ b/dataframes.py @@ -9,7 +9,7 @@ class DataFrames(): - db_file = 'db/mstables.sqlite' # Standard db file name + db_file = 'data/mstables.sqlite' # Standard db file name def __init__(self, file = db_file): diff --git a/main.py b/main.py index 52ac458..79273f4 100755 --- a/main.py +++ b/main.py @@ -1,9 +1,9 @@ #!/usr/bin/env python from shutil import copyfile -from datetime import datetime from importlib import reload -import fetch, time, os, re, sqlite3 +import time, os, re +from src import fetch __author__ = "Caio Brandao" __copyright__ = "Copyright 2019+, Caio Brandao" diff --git a/parse.py b/parse.py index a6225d0..5ebfc1e 100644 --- a/parse.py +++ b/parse.py @@ -1,10 +1,9 @@ from bs4 import BeautifulSoup as bs -from importlib import reload #Comment out once done using import datetime as DT from io import StringIO import pandas as pd -import numpy as np -import fetch, sqlite3, time, json, zlib, csv, sys, re +import sqlite3, time, json, zlib, re +from src import fetch # Manage database connection and fetch data to be parsed @@ -277,7 +276,7 @@ def parse_2(cur, ticker_id, exch_id, data): # Update Tickers table with parsed data sql = fetch.sql_update_record('Master', {'industry_id':industry_id, 'stock_type_id':stype_id, 'fyend_id':fyend_id, 'style_id':style_id}, - {'ticker_id':ticker_id, 'exchange_id':exch_id}) + {'ticker_id':ticker_id, 'exchange_id':exch_id}) fetch.db_execute(cur, sql) return 200 @@ -831,7 +830,7 @@ def parse_9(cur, ticker_id, exch_id, data): info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = fetch.sql_insert('InsiderTransactions', - tuple(info.keys()), tuple(info.values())) + tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql) return 200 diff --git a/requirements.txt b/requirements.txt index e69de29..e58423b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,15 @@ +zlib~=1.2.13 +fetch +time +os +re +numpy +requests +pandas +sqlite3 +sqlalchemy +requests +json +zlib +parse +bs4 \ No newline at end of file diff --git a/src/fetch.py b/src/fetch.py index f298063..7cb7315 100644 --- a/src/fetch.py +++ b/src/fetch.py @@ -100,17 +100,17 @@ def get_ticker(u, typ): # Insert list of countries into Countries table sql = '''INSERT OR IGNORE INTO Countries (country, a2_iso, a3_un) VALUES (?, ?, ?)''' - cur.executemany(sql, csv_content('input/ctycodes.csv', 3)) + cur.executemany(sql, csv_content('../input/ctycodes.csv', 3)) # Insert list of currencies into Currencies table sql = '''INSERT OR IGNORE INTO Currencies (currency, currency_code) VALUES (?, ?)''' - cur.executemany(sql, csv_content('input/symbols.csv', 2)) + cur.executemany(sql, csv_content('../input/symbols.csv', 2)) # Insert list of types into SecurityTypes table sql = '''INSERT OR IGNORE INTO SecurityTypes (security_type_code, security_type) VALUES (?, ?)''' - cur.executemany(sql, csv_content('input/ms_investment-types.csv', 2)) + cur.executemany(sql, csv_content('../input/ms_investment-types.csv', 2)) # Insert list of api URLs into URLs table for k, v in apis.items(): From e58c81609d2c15fce04d8edf3f5ced2e9dac9774 Mon Sep 17 00:00:00 2001 From: Andrew Schell Date: Fri, 24 Jan 2025 00:19:14 -0800 Subject: [PATCH 2/2] moved dataframe.py to src with parse and requirements.txt --- main.py | 4 +- .../data_overview.ipynb | 368 +++++++++--------- requirements.txt | 15 + dataframes.py => src/dataframes.py | 2 +- src/fetch.py | 6 +- parse.py => src/parse.py | 9 +- 6 files changed, 199 insertions(+), 205 deletions(-) rename data_overview.ipynb => notebooks/data_overview.ipynb (97%) rename dataframes.py => src/dataframes.py (99%) rename parse.py => src/parse.py (99%) diff --git a/main.py b/main.py index 52ac458..79273f4 100755 --- a/main.py +++ b/main.py @@ -1,9 +1,9 @@ #!/usr/bin/env python from shutil import copyfile -from datetime import datetime from importlib import reload -import fetch, time, os, re, sqlite3 +import time, os, re +from src import fetch __author__ = "Caio Brandao" __copyright__ = "Copyright 2019+, Caio Brandao" diff --git a/data_overview.ipynb b/notebooks/data_overview.ipynb similarity index 97% rename from data_overview.ipynb rename to notebooks/data_overview.ipynb index c0f82fe..33d8bf6 100644 --- a/data_overview.ipynb +++ b/notebooks/data_overview.ipynb @@ -55,10 +55,113 @@ ] }, { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-24T08:25:18.844736Z", + "start_time": "2025-01-24T08:24:46.251179Z" + } + }, + "cell_type": "code", + "source": [ + "!pip3 install matplotlib\n", + "!pip3 install pandas\n", + "!pip3 install numpy\n", + "!pip3 install dataframes #module containing class used to create DataFrame objects from SQLite database file\n", + "!pip3 install datetime\n", + "\n", + "!pip3 install bs4\n", + "!pip3 install requests" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "\u001B[31mERROR: Could not find a version that satisfies the requirement matplotlib.pyplot (from versions: none)\u001B[0m\u001B[31m\r\n", + "\u001B[0m\u001B[31mERROR: No matching distribution found for matplotlib.pyplot\u001B[0m\u001B[31m\r\n", + "\u001B[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "Collecting matplotlib\r\n", + " Downloading matplotlib-3.7.5-cp38-cp38-macosx_10_12_x86_64.whl.metadata (5.7 kB)\r\n", + "Collecting contourpy>=1.0.1 (from matplotlib)\r\n", + " Downloading contourpy-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl.metadata (5.9 kB)\r\n", + "Collecting cycler>=0.10 (from matplotlib)\r\n", + " Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\r\n", + "Collecting fonttools>=4.22.0 (from matplotlib)\r\n", + " Downloading fonttools-4.55.5-cp38-cp38-macosx_10_9_x86_64.whl.metadata (165 kB)\r\n", + "Collecting kiwisolver>=1.0.1 (from matplotlib)\r\n", + " Downloading kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl.metadata (6.3 kB)\r\n", + "Requirement already satisfied: numpy<2,>=1.20 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from matplotlib) (1.24.3)\r\n", + "Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from matplotlib) (24.1)\r\n", + "Collecting pillow>=6.2.0 (from matplotlib)\r\n", + " Downloading pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl.metadata (9.2 kB)\r\n", + "Collecting pyparsing>=2.3.1 (from matplotlib)\r\n", + " Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)\r\n", + "Requirement already satisfied: python-dateutil>=2.7 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from matplotlib) (2.9.0.post0)\r\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from matplotlib) (6.4.0)\r\n", + "Requirement already satisfied: zipp>=3.1.0 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from importlib-resources>=3.2.0->matplotlib) (3.20.2)\r\n", + "Requirement already satisfied: six>=1.5 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\r\n", + "Downloading matplotlib-3.7.5-cp38-cp38-macosx_10_12_x86_64.whl (7.4 MB)\r\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m7.4/7.4 MB\u001B[0m \u001B[31m28.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0ma \u001B[36m0:00:01\u001B[0m\r\n", + "\u001B[?25hDownloading contourpy-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl (247 kB)\r\n", + "Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\r\n", + "Downloading fonttools-4.55.5-cp38-cp38-macosx_10_9_x86_64.whl (2.3 MB)\r\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m2.3/2.3 MB\u001B[0m \u001B[31m32.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n", + "\u001B[?25hDownloading kiwisolver-1.4.7-cp38-cp38-macosx_10_9_x86_64.whl (65 kB)\r\n", + "Downloading pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl (3.5 MB)\r\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m3.5/3.5 MB\u001B[0m \u001B[31m38.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\r\n", + "\u001B[?25hDownloading pyparsing-3.1.4-py3-none-any.whl (104 kB)\r\n", + "Installing collected packages: pyparsing, pillow, kiwisolver, fonttools, cycler, contourpy, matplotlib\r\n", + "Successfully installed contourpy-1.1.1 cycler-0.12.1 fonttools-4.55.5 kiwisolver-1.4.7 matplotlib-3.7.5 pillow-10.4.0 pyparsing-3.1.4\r\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "Requirement already satisfied: pandas in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (2.0.3)\r\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from pandas) (2.9.0.post0)\r\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from pandas) (2024.1)\r\n", + "Requirement already satisfied: tzdata>=2022.1 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from pandas) (2023.3)\r\n", + "Requirement already satisfied: numpy>=1.20.3 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from pandas) (1.24.3)\r\n", + "Requirement already satisfied: six>=1.5 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\r\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "Requirement already satisfied: numpy in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (1.24.3)\r\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "\u001B[31mERROR: Could not find a version that satisfies the requirement dataframes (from versions: none)\u001B[0m\u001B[31m\r\n", + "\u001B[0m\u001B[31mERROR: No matching distribution found for dataframes\u001B[0m\u001B[31m\r\n", + "\u001B[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "Collecting datetime\r\n", + " Downloading DateTime-5.5-py3-none-any.whl.metadata (33 kB)\r\n", + "Collecting zope.interface (from datetime)\r\n", + " Downloading zope.interface-7.2-cp38-cp38-macosx_10_9_x86_64.whl.metadata (44 kB)\r\n", + "Requirement already satisfied: pytz in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from datetime) (2024.1)\r\n", + "Requirement already satisfied: setuptools in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from zope.interface->datetime) (75.1.0)\r\n", + "Downloading DateTime-5.5-py3-none-any.whl (52 kB)\r\n", + "Downloading zope.interface-7.2-cp38-cp38-macosx_10_9_x86_64.whl (208 kB)\r\n", + "Installing collected packages: zope.interface, datetime\r\n", + "Successfully installed datetime-5.5 zope.interface-7.2\r\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "\u001B[31mERROR: Could not find a version that satisfies the requirement re (from versions: none)\u001B[0m\u001B[31m\r\n", + "\u001B[0m\u001B[31mERROR: No matching distribution found for re\u001B[0m\u001B[31m\r\n", + "\u001B[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "Requirement already satisfied: bs4 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (0.0.2)\r\n", + "Requirement already satisfied: beautifulsoup4 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from bs4) (4.12.3)\r\n", + "Requirement already satisfied: soupsieve>1.2 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from beautifulsoup4->bs4) (2.5)\r\n", + "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\r\n", + "Requirement already satisfied: requests in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (2.32.3)\r\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from requests) (3.3.2)\r\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from requests) (3.7)\r\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from requests) (2.2.3)\r\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/envs/mstables/lib/python3.8/site-packages (from requests) (2024.8.30)\r\n" + ] + } + ], + "execution_count": 5 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-24T08:25:30.456404Z", + "start_time": "2025-01-24T08:25:18.972868Z" + } + }, "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], "source": [ "%matplotlib notebook\n", "\n", @@ -73,7 +176,9 @@ "# Reload in case changes have been made to module file\n", "from importlib import reload\n", "reload(dataframes);" - ] + ], + "outputs": [], + "execution_count": 6 }, { "cell_type": "markdown", @@ -87,14 +192,22 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-24T08:25:31.679968Z", + "start_time": "2025-01-24T08:25:30.475838Z" + } + }, + "source": [ + "db_file_name = 'mstables' # SQLite database file that contains the data to be analyzed\n", + "df = dataframes.DataFrames('data/{}.sqlite'.format(db_file_name))" + ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Creating initial DataFrames objects from file db/mstables.sqlite...\n", + "Creating initial DataFrames objects from file data/mstables.sqlite...\n", "\n", "\t- DataFrame 'df.colheaders' ...\n", "\t- DataFrame 'df.timerefs' ...\n", @@ -115,10 +228,7 @@ ] } ], - "source": [ - "db_file_name = 'mstables' # SQLite database file that contains the data to be analyzed\n", - "df = dataframes.DataFrames('db/{}.sqlite'.format(db_file_name))" - ] + "execution_count": 7 }, { "cell_type": "markdown", @@ -132,12 +242,17 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-24T08:25:31.697900Z", + "start_time": "2025-01-24T08:25:31.693201Z" + } + }, "source": [ "df_master = df.master.copy()" - ] + ], + "outputs": [], + "execution_count": 8 }, { "cell_type": "markdown", @@ -155,16 +270,36 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-24T08:25:33.494545Z", + "start_time": "2025-01-24T08:25:31.713543Z" + } + }, "source": [ "cutoff_days = 10\n", "df_updated_ct = df_master[['update_date', 'ticker']].groupby('update_date').count().sort_index()\n", "cutoff_date = df_updated_ct[df_updated_ct['ticker'] > 100].index[0] - DT.timedelta(days=cutoff_days)\n", "\n", "df_master = df_master.where(df_master['lastdate'] >= cutoff_date).dropna(axis=0, how='all')" - ] + ], + "outputs": [ + { + "ename": "IndexError", + "evalue": "index 0 is out of bounds for axis 0 with size 0", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mIndexError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[9], line 3\u001B[0m\n\u001B[1;32m 1\u001B[0m cutoff_days \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m10\u001B[39m\n\u001B[1;32m 2\u001B[0m df_updated_ct \u001B[38;5;241m=\u001B[39m df_master[[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mupdate_date\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mticker\u001B[39m\u001B[38;5;124m'\u001B[39m]]\u001B[38;5;241m.\u001B[39mgroupby(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mupdate_date\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39mcount()\u001B[38;5;241m.\u001B[39msort_index()\n\u001B[0;32m----> 3\u001B[0m cutoff_date \u001B[38;5;241m=\u001B[39m \u001B[43mdf_updated_ct\u001B[49m\u001B[43m[\u001B[49m\u001B[43mdf_updated_ct\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mticker\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m>\u001B[39;49m\u001B[43m \u001B[49m\u001B[38;5;241;43m100\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;241;43m0\u001B[39;49m\u001B[43m]\u001B[49m \u001B[38;5;241m-\u001B[39m DT\u001B[38;5;241m.\u001B[39mtimedelta(days\u001B[38;5;241m=\u001B[39mcutoff_days)\n\u001B[1;32m 5\u001B[0m df_master \u001B[38;5;241m=\u001B[39m df_master\u001B[38;5;241m.\u001B[39mwhere(df_master[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mlastdate\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m>\u001B[39m\u001B[38;5;241m=\u001B[39m cutoff_date)\u001B[38;5;241m.\u001B[39mdropna(axis\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m0\u001B[39m, how\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mall\u001B[39m\u001B[38;5;124m'\u001B[39m)\n", + "File \u001B[0;32m/opt/anaconda3/envs/mstables/lib/python3.8/site-packages/pandas/core/indexes/base.py:5175\u001B[0m, in \u001B[0;36mIndex.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 5172\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(key) \u001B[38;5;129;01mor\u001B[39;00m is_float(key):\n\u001B[1;32m 5173\u001B[0m \u001B[38;5;66;03m# GH#44051 exclude bool, which would return a 2d ndarray\u001B[39;00m\n\u001B[1;32m 5174\u001B[0m key \u001B[38;5;241m=\u001B[39m com\u001B[38;5;241m.\u001B[39mcast_scalar_indexer(key)\n\u001B[0;32m-> 5175\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mgetitem\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 5177\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(key, \u001B[38;5;28mslice\u001B[39m):\n\u001B[1;32m 5178\u001B[0m \u001B[38;5;66;03m# This case is separated from the conditional above to avoid\u001B[39;00m\n\u001B[1;32m 5179\u001B[0m \u001B[38;5;66;03m# pessimization com.is_bool_indexer and ndim checks.\u001B[39;00m\n\u001B[1;32m 5180\u001B[0m result \u001B[38;5;241m=\u001B[39m getitem(key)\n", + "File \u001B[0;32m/opt/anaconda3/envs/mstables/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py:370\u001B[0m, in \u001B[0;36mDatetimeLikeArrayMixin.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 362\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 363\u001B[0m \u001B[38;5;124;03mThis getitem defers to the underlying array, which by-definition can\u001B[39;00m\n\u001B[1;32m 364\u001B[0m \u001B[38;5;124;03monly handle list-likes, slices, and integer scalars\u001B[39;00m\n\u001B[1;32m 365\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 366\u001B[0m \u001B[38;5;66;03m# Use cast as we know we will get back a DatetimeLikeArray or DTScalar,\u001B[39;00m\n\u001B[1;32m 367\u001B[0m \u001B[38;5;66;03m# but skip evaluating the Union at runtime for performance\u001B[39;00m\n\u001B[1;32m 368\u001B[0m \u001B[38;5;66;03m# (see https://github.com/pandas-dev/pandas/pull/44624)\u001B[39;00m\n\u001B[1;32m 369\u001B[0m result \u001B[38;5;241m=\u001B[39m cast(\n\u001B[0;32m--> 370\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mUnion[DatetimeLikeArrayT, DTScalarOrNaT]\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;21;43m__getitem__\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 371\u001B[0m )\n\u001B[1;32m 372\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m lib\u001B[38;5;241m.\u001B[39mis_scalar(result):\n\u001B[1;32m 373\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m result\n", + "File \u001B[0;32m/opt/anaconda3/envs/mstables/lib/python3.8/site-packages/pandas/core/arrays/_mixins.py:272\u001B[0m, in \u001B[0;36mNDArrayBackedExtensionArray.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 266\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__getitem__\u001B[39m(\n\u001B[1;32m 267\u001B[0m \u001B[38;5;28mself\u001B[39m: NDArrayBackedExtensionArrayT,\n\u001B[1;32m 268\u001B[0m key: PositionalIndexer2D,\n\u001B[1;32m 269\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m NDArrayBackedExtensionArrayT \u001B[38;5;241m|\u001B[39m Any:\n\u001B[1;32m 270\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m lib\u001B[38;5;241m.\u001B[39mis_integer(key):\n\u001B[1;32m 271\u001B[0m \u001B[38;5;66;03m# fast-path\u001B[39;00m\n\u001B[0;32m--> 272\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_ndarray\u001B[49m\u001B[43m[\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m]\u001B[49m\n\u001B[1;32m 273\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mndim \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 274\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_box_func(result)\n", + "\u001B[0;31mIndexError\u001B[0m: index 0 is out of bounds for axis 0 with size 0" + ] + } + ], + "execution_count": 9 }, { "cell_type": "markdown", @@ -272,185 +407,30 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-24T08:25:33.580884Z", + "start_time": "2025-01-24T08:23:21.075160Z" + } + }, + "source": [ + "#df_quote.head()\n", + "df_quote()" + ], "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ticker_idexchange_idopenpricelastpriceday_hiday_lo_52wk_hi_52wk_loyieldlastvolavevolfpepbpspccurrency_idlastdate
0137415.6515.6815.6915.6417.7212.162.012747.02983.0NaN2.46.416.2104.02019-05-07
1237415.6515.5215.6915.5217.6813.682.037800.02280.0NaN2.46.316.0104.02019-05-06
2337415.6115.3115.6115.3117.7911.992.001013.011124.0NaN2.46.416.2104.02019-05-07
34482102.97101.78103.94101.38115.1187.871.16971891.0758551.022.93.54.423.6104.02019-05-07
4510.0000000NaN500.0342.0NaNNaNNaNNaN104.02019-04-25
\n", - "
" - ], - "text/plain": [ - " ticker_id exchange_id openprice lastprice day_hi day_lo _52wk_hi \\\n", - "0 1 374 15.65 15.68 15.69 15.64 17.72 \n", - "1 2 374 15.65 15.52 15.69 15.52 17.68 \n", - "2 3 374 15.61 15.31 15.61 15.31 17.79 \n", - "3 4 482 102.97 101.78 103.94 101.38 115.11 \n", - "4 5 1 0.00 0 0 0 0 \n", - "\n", - " _52wk_lo yield lastvol avevol fpe pb ps pc currency_id \\\n", - "0 12.16 2.01 2747.0 2983.0 NaN 2.4 6.4 16.2 104.0 \n", - "1 13.68 2.03 7800.0 2280.0 NaN 2.4 6.3 16.0 104.0 \n", - "2 11.99 2.00 1013.0 11124.0 NaN 2.4 6.4 16.2 104.0 \n", - "3 87.87 1.16 971891.0 758551.0 22.9 3.5 4.4 23.6 104.0 \n", - "4 0 NaN 500.0 342.0 NaN NaN NaN NaN 104.0 \n", - "\n", - " lastdate \n", - "0 2019-05-07 \n", - "1 2019-05-06 \n", - "2 2019-05-07 \n", - "3 2019-05-07 \n", - "4 2019-04-25 " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'df_quote' is not defined", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[3], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;66;03m#df_quote.head()\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m \u001B[43mdf_quote\u001B[49m()\n", + "\u001B[0;31mNameError\u001B[0m: name 'df_quote' is not defined" + ] } ], - "source": [ - "df_quote.head()" - ] + "execution_count": 3 }, { "cell_type": "markdown", diff --git a/requirements.txt b/requirements.txt index e69de29..e58423b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,15 @@ +zlib~=1.2.13 +fetch +time +os +re +numpy +requests +pandas +sqlite3 +sqlalchemy +requests +json +zlib +parse +bs4 \ No newline at end of file diff --git a/dataframes.py b/src/dataframes.py similarity index 99% rename from dataframes.py rename to src/dataframes.py index 3b1df14..33f9736 100644 --- a/dataframes.py +++ b/src/dataframes.py @@ -9,7 +9,7 @@ class DataFrames(): - db_file = 'db/mstables.sqlite' # Standard db file name + db_file = 'data/mstables.sqlite' # Standard db file name def __init__(self, file = db_file): diff --git a/src/fetch.py b/src/fetch.py index f298063..7cb7315 100644 --- a/src/fetch.py +++ b/src/fetch.py @@ -100,17 +100,17 @@ def get_ticker(u, typ): # Insert list of countries into Countries table sql = '''INSERT OR IGNORE INTO Countries (country, a2_iso, a3_un) VALUES (?, ?, ?)''' - cur.executemany(sql, csv_content('input/ctycodes.csv', 3)) + cur.executemany(sql, csv_content('../input/ctycodes.csv', 3)) # Insert list of currencies into Currencies table sql = '''INSERT OR IGNORE INTO Currencies (currency, currency_code) VALUES (?, ?)''' - cur.executemany(sql, csv_content('input/symbols.csv', 2)) + cur.executemany(sql, csv_content('../input/symbols.csv', 2)) # Insert list of types into SecurityTypes table sql = '''INSERT OR IGNORE INTO SecurityTypes (security_type_code, security_type) VALUES (?, ?)''' - cur.executemany(sql, csv_content('input/ms_investment-types.csv', 2)) + cur.executemany(sql, csv_content('../input/ms_investment-types.csv', 2)) # Insert list of api URLs into URLs table for k, v in apis.items(): diff --git a/parse.py b/src/parse.py similarity index 99% rename from parse.py rename to src/parse.py index a6225d0..5ebfc1e 100644 --- a/parse.py +++ b/src/parse.py @@ -1,10 +1,9 @@ from bs4 import BeautifulSoup as bs -from importlib import reload #Comment out once done using import datetime as DT from io import StringIO import pandas as pd -import numpy as np -import fetch, sqlite3, time, json, zlib, csv, sys, re +import sqlite3, time, json, zlib, re +from src import fetch # Manage database connection and fetch data to be parsed @@ -277,7 +276,7 @@ def parse_2(cur, ticker_id, exch_id, data): # Update Tickers table with parsed data sql = fetch.sql_update_record('Master', {'industry_id':industry_id, 'stock_type_id':stype_id, 'fyend_id':fyend_id, 'style_id':style_id}, - {'ticker_id':ticker_id, 'exchange_id':exch_id}) + {'ticker_id':ticker_id, 'exchange_id':exch_id}) fetch.db_execute(cur, sql) return 200 @@ -831,7 +830,7 @@ def parse_9(cur, ticker_id, exch_id, data): info['ticker_id'] = ticker_id info['exchange_id'] = exch_id sql = fetch.sql_insert('InsiderTransactions', - tuple(info.keys()), tuple(info.values())) + tuple(info.keys()), tuple(info.values())) fetch.db_execute(cur, sql) return 200