Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

migrated backend in prep to move to 3.12 #16

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python

from shutil import copyfile
from datetime import datetime
from importlib import reload
import fetch, time, os, re, sqlite3
import time, os, re
from src import fetch

__author__ = "Caio Brandao"
__copyright__ = "Copyright 2019+, Caio Brandao"
Expand Down
368 changes: 174 additions & 194 deletions data_overview.ipynb → notebooks/data_overview.ipynb

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
zlib~=1.2.13
fetch
time
os
re
numpy
requests
pandas
sqlite3
sqlalchemy
requests
json
zlib
parse
bs4
2 changes: 1 addition & 1 deletion dataframes.py → src/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class DataFrames():

db_file = 'db/mstables.sqlite' # Standard db file name
db_file = 'data/mstables.sqlite' # Standard db file name

def __init__(self, file = db_file):

Expand Down
48 changes: 27 additions & 21 deletions fetch.py → src/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,17 @@ def get_ticker(u, typ):
# Insert list of countries into Countries table
sql = '''INSERT OR IGNORE INTO Countries
(country, a2_iso, a3_un) VALUES (?, ?, ?)'''
cur.executemany(sql, csv_content('input/ctycodes.csv', 3))
cur.executemany(sql, csv_content('../input/ctycodes.csv', 3))

# Insert list of currencies into Currencies table
sql = '''INSERT OR IGNORE INTO Currencies (currency, currency_code)
VALUES (?, ?)'''
cur.executemany(sql, csv_content('input/symbols.csv', 2))
cur.executemany(sql, csv_content('../input/symbols.csv', 2))

# Insert list of types into SecurityTypes table
sql = '''INSERT OR IGNORE INTO SecurityTypes
(security_type_code, security_type) VALUES (?, ?)'''
cur.executemany(sql, csv_content('input/ms_investment-types.csv', 2))
cur.executemany(sql, csv_content('../input/ms_investment-types.csv', 2))

# Insert list of api URLs into URLs table
for k, v in apis.items():
Expand Down Expand Up @@ -291,23 +291,24 @@ def fetch(db_file):

# Fetch data from API's using multiprocessing.Pool
results = []
while True:
try:
with mp.Pool(pool_size) as p:
#r = p.imap_unordered(fetch_api, items)
#r = p.map(fetch_api, items)
r = p.imap(fetch_api, items)
for turn in range(len(items)):
try:
results.append(r.next(timeout=5))
except mp.context.TimeoutError:
pass
break
except KeyboardInterrupt:
print('\nGoodbye!')
exit()
except:
raise
try:
with mp.Pool(pool_size) as p:
# Use .map() instead of .imap() for better stability
r = p.map_async(fetch_api, items)
try:
results = r.get(timeout=60) # Increased timeout
except mp.TimeoutError:
p.terminate()
print("Pool timed out")
except KeyboardInterrupt:
print('\nGoodbye!')
exit()
except Exception as e:
print(f"Pool error: {e}")
raise

# Filter None results after pool completes
results = [r for r in results if r is not None]

# Fetch data from API's without multiprocessing.Pool
'''for item in url_info:
Expand Down Expand Up @@ -373,7 +374,12 @@ def fetch_api(url_info):
t0 = time.time()

# Unpack variables
url_id, url, ticker_id, exch_id = url_info
try:
url_id, url, ticker_id, exch_id = url_info
except (IndexError, ValueError) as e:
print(f"Error unpacking url_info: {e}")
return None

num = ticker_list[url_id]['{}:{}'.format(exch_id, ticker_id)]
ct = ticker_count[url_id]
print_progress(url_id, num, ct)
Expand Down
9 changes: 4 additions & 5 deletions parse.py → src/parse.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from bs4 import BeautifulSoup as bs
from importlib import reload #Comment out once done using
import datetime as DT
from io import StringIO
import pandas as pd
import numpy as np
import fetch, sqlite3, time, json, zlib, csv, sys, re
import sqlite3, time, json, zlib, re
from src import fetch


# Manage database connection and fetch data to be parsed
Expand Down Expand Up @@ -277,7 +276,7 @@ def parse_2(cur, ticker_id, exch_id, data):
# Update Tickers table with parsed data
sql = fetch.sql_update_record('Master', {'industry_id':industry_id,
'stock_type_id':stype_id, 'fyend_id':fyend_id, 'style_id':style_id},
{'ticker_id':ticker_id, 'exchange_id':exch_id})
{'ticker_id':ticker_id, 'exchange_id':exch_id})
fetch.db_execute(cur, sql)

return 200
Expand Down Expand Up @@ -831,7 +830,7 @@ def parse_9(cur, ticker_id, exch_id, data):
info['ticker_id'] = ticker_id
info['exchange_id'] = exch_id
sql = fetch.sql_insert('InsiderTransactions',
tuple(info.keys()), tuple(info.values()))
tuple(info.keys()), tuple(info.values()))
fetch.db_execute(cur, sql)

return 200
Expand Down