Skip to content

Commit

Permalink
Fixes petercerno#6 and petercerno#7 url problems. Also adds unit test…
Browse files Browse the repository at this point in the history
… and exception handling for passing empty string and invalid ticker symbol
  • Loading branch information
linwoodc3 committed Jul 23, 2017
1 parent 1d9857a commit 3df0634
Show file tree
Hide file tree
Showing 5 changed files with 190 additions and 45 deletions.
41 changes: 41 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
language: python
cache: pip
matrix:
include:
- os: linux
sudo: required
python: 3.4
env: GDELT=py34
- os: linux
sudo: required
python: 3.5
env: GDELT=py35
- os: linux
sudo: required
python: 3.6
env: GDELT=py36
- os: osx
language: generic
env: GDELT=py34
- os: osx
language: generic
env: GDELT=py35
- os: osx
language: generic
env: GDELT=py36

install:
- source travis/install.sh
- 'pip install pip -U'
- "pip install pytest-cov"
script:
- py.test --cov=./

cache:
directories:
- $HOME/.cache/pip
before_cache:
- rm -f $HOME/.cache/pip/log/debug.log

after_success:
- codecov
24 changes: 21 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ Motivation

Good Morning is intended to be used as an extension to [QSToolKit (QSTK)](http://wiki.quantsoftware.org/index.php?title=QuantSoftware_ToolKit) library. By using [QSTK](http://wiki.quantsoftware.org/index.php?title=QuantSoftware_ToolKit) you can easily download historical stock market data from [Yahoo Finance](http://finance.yahoo.com/). You can also download fundamental financial data from [Compustat](https://www.capitaliq.com/home/what-we-offer/information-you-need/financials-valuation/compustat-financials.aspx). However, most individuals and institutions do not have access to [Compustat](https://www.capitaliq.com/home/what-we-offer/information-you-need/financials-valuation/compustat-financials.aspx). Good Morning attempts to mitigate this limitation by providing a very simple Python interface for downloading fundamental financial data from [financials.morningstar.com](http://financials.morningstar.com/).

Example
Example of Downloading Key Ratios from MorningStar
=======

import good_morning as gm
import morningstar as gm
kr = gm.KeyRatiosDownloader()
kr_frames = kr.download('AAPL')

The variable `kr_frames` now holds an array of [`pandas.DataFrame`](http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html)s containing the key ratios for the morningstar ticker [`AAPL`](http://financials.morningstar.com/ratios/r.html?t=AAPL&region=usa&culture=en-US).

print kr_frames[0]
print (kr_frames[0])

Outputs:

Expand All @@ -43,6 +43,24 @@ If we specify the MySQL connection `conn` the retrieved data will be uploaded to
host = DB_HOST, user = DB_USER, passwd = DB_PASS, db = DB_NAME)
kr_frames = kr.download('AAPL', conn)


Example of Downloading Financials (e.g. *income statement, balance sheet, cash flow*) from MorningStar
=======

import morningstar as gm
kr = gm.FinancialsDownloader()
kr_fins = kr.download('AAPL')

Different from the `KeyRatiosDownloader` class, `kr_fins` now holds a dictionary containing the financials for the morningstar ticker [`AAPL`](http://financials.morningstar.com/ratios/r.html?t=AAPL&region=usa&culture=en-US). The financials **may differ** from company to company.

print (fins.keys())

Output:

dict_keys(['income_statement', 'balance_sheet', 'cash_flow', 'period_range', 'fiscal_year_end', 'currency'])



Every [`pandas.DataFrame`](http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html) in the array `kr_frames` will be uploaded to a different database table. In our case the following tables will be created:

`morningstar_key_balance_sheet_items_in_percent`
Expand Down
98 changes: 56 additions & 42 deletions morningstar/good_morning.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,19 @@
from bs4 import BeautifulSoup, Tag
from datetime import date


class KeyRatiosDownloader(object):
u"""Downloads key ratios from http://financials.morningstar.com/
"""

def __init__(self, table_prefix = u'morningstar_'):
def __init__(self, table_prefix=u'morningstar_'):
u"""Constructs the KeyRatiosDownloader instance.
:param table_prefix: Prefix of the MySQL tables.
"""
self._table_prefix = table_prefix

def download(self, ticker, conn = None):
def download(self, ticker, conn=None):
u"""Downloads and returns key ratios for the given Morningstar ticker.
Downloads and returns an array of pandas.DataFrames containing the key
Expand All @@ -56,10 +57,10 @@ def download(self, ticker, conn = None):
:param conn: MySQL connection.
:return: List of pandas.DataFrames containing the key ratios.
"""
url = (ur'http://financials.morningstar.com/ajax/exportKR2CSV.html?' +
ur'&callback=?&t={0}&region=usa&culture=en-US&cur=USD'.format(
url = (r'http://financials.morningstar.com/ajax/exportKR2CSV.html?' +
r'&callback=?&t={0}&region=usa&culture=en-US&cur=USD'.format(
ticker))
with urllib2.urlopen(url) as response:
with urllib.request.urlopen(url) as response:
tables = self._parse_tables(response)
response_structure = [
# Original Name, New pandas.DataFrame Name
Expand All @@ -77,7 +78,18 @@ def download(self, ticker, conn = None):
(u'Key Ratios -> Financial Health',
u'Key Liquidity/Financial Health'),
(u'Key Ratios -> Efficiency Ratios', u'Key Efficiency Ratios')]

frames = self._parse_frames(tables, response_structure)

if len(ticker) == 0:
raise ValueError("You did not enter a ticker symbol. Please"
" try again.")
elif frames == "MorningStar could not find the ticker":

raise ValueError("MorningStar cannot find the ticker symbol "
"you entered or it is INVALID. Please try "
"again.")

currency = re.match(u'^.* ([A-Z]+) Mil$',
frames[0].index[0]).group(1)
frames[0].index.name += u' ' + currency
Expand All @@ -96,7 +108,7 @@ def _parse_tables(response):
"""
# Regex pattern used to recognize csv lines containing financial data.
num_commas = 5
pat_commas = ur'(.*,){%d,}' % num_commas
pat_commas = r'(.*,){%d,}' % num_commas
# Resulting array of pairs (table_name, table_frame).
tables = []
table_name = None
Expand Down Expand Up @@ -126,9 +138,11 @@ def _parse_frames(tables, response_structure):
:param response_structure: List of pairs (expected table name, new name
assigned to the corresponding (processed) pandas.DataFrame).
"""
if len(tables) == 0:
return ("MorningStar could not find the ticker")
period_start = tables[0][1].ix[0][1]
period_month = pd.datetime.strptime(period_start, u'%Y-%m').month
#period_freq = pd.datetools.YearEnd(month=period_month)
# period_freq = pd.datetools.YearEnd(month=period_month)
period_freq = pd.tseries.offsets.YearEnd(month=period_month)
frames = []
for index, (check_name, frame_name) in enumerate(response_structure):
Expand Down Expand Up @@ -157,7 +171,7 @@ def _process_frame(frame, frame_name, period_start,
periods=len(output_frame.ix[0]),
freq=period_freq)
output_frame.columns.name = u'Period'
if re.match(ur'^\d{4}-\d{2}$', output_frame.ix[0][0]):
if re.match(r'^\d{4}-\d{2}$', output_frame.ix[0][0]):
output_frame.drop(output_frame.index[0], inplace=True)
output_frame.replace(u',', u'', regex=True, inplace=True)
output_frame.replace(u'^\s*$', u'NaN', regex=True, inplace=True)
Expand Down Expand Up @@ -188,8 +202,8 @@ def _get_db_name(name):
.replace(u'/', u' per ')
.replace(u'&', u' and ')
.replace(u'%', u' percent '))
name = re.sub(ur'[^a-z0-9]', u' ', name)
name = re.sub(ur'\s+', u' ', name).strip()
name = re.sub(r'[^a-z0-9]', u' ', name)
name = re.sub(r'\s+', u' ', name).strip()
return name.replace(u' ', u'_')

def _get_db_table_name(self, frame):
Expand Down Expand Up @@ -237,24 +251,24 @@ def _get_db_replace_values(self, ticker, frame):
u'REPLACE INTO `%s`\n' % self._get_db_table_name(frame) +
u' (%s)\nVALUES\n' % u',\n '.join(columns) +
u',\n'.join([u'("' + ticker + u'", "' + column.strftime(u'%Y-%m-%d') +
u'", ' +
u', '.join([u'NULL' if np.isnan(x) else u'%.5f' % x
for x in frame[column].values]) +
u')' for column in frame.columns]))
u'", ' +
u', '.join([u'NULL' if np.isnan(x) else u'%.5f' % x
for x in frame[column].values]) +
u')' for column in frame.columns]))


class FinancialsDownloader(object):
u"""Downloads financials from http://financials.morningstar.com/
"""

def __init__(self, table_prefix = u'morningstar_'):
def __init__(self, table_prefix=u'morningstar_'):
u"""Constructs the FinancialsDownloader instance.
:param table_prefix: Prefix of the MySQL tables.
"""
self._table_prefix = table_prefix

def download(self, ticker, conn = None):
def download(self, ticker, conn=None):
u"""Downloads and returns a dictionary containing pandas.DataFrames
representing the financials (i.e. income statement, balance sheet,
cash flow) for the given Morningstar ticker. If the MySQL connection
Expand All @@ -268,9 +282,9 @@ def download(self, ticker, conn = None):
"""
result = {}
for report_type, table_name in [
(u'is', u'income_statement'),
(u'bs', u'balance_sheet'),
(u'cf', u'cash_flow')]:
(u'is', u'income_statement'),
(u'bs', u'balance_sheet'),
(u'cf', u'cash_flow')]:
frame = self._download(ticker, report_type)
result[table_name] = frame
if conn:
Expand All @@ -292,15 +306,15 @@ def _download(self, ticker, report_type):
:return pandas.DataFrame corresponding to the given Morningstar ticker
and the given type of the report.
"""
url = (ur'http://financials.morningstar.com/ajax/' +
ur'ReportProcess4HtmlAjax.html?&t=' + ticker +
ur'&region=usa&culture=en-US&cur=USD' +
ur'&reportType=' + report_type + ur'&period=12' +
ur'&dataType=A&order=asc&columnYear=5&rounding=3&view=raw')
with urllib2.urlopen(url) as response:
url = (r'http://financials.morningstar.com/ajax/' +
r'ReportProcess4HtmlAjax.html?&t=' + ticker +
r'&region=usa&culture=en-US&cur=USD' +
r'&reportType=' + report_type + r'&period=12' +
r'&dataType=A&order=asc&columnYear=5&rounding=3&view=raw')
with urllib.request.urlopen(url) as response:
json_text = response.read().decode(u'utf-8')
json_data = json.loads(json_text)
result_soup = BeautifulSoup(json_data[u'result'],u'html.parser')
result_soup = BeautifulSoup(json_data[u'result'], u'html.parser')
return self._parse(result_soup)

def _parse(self, soup):
Expand All @@ -321,7 +335,7 @@ def _parse(self, soup):
self._period_range = pd.period_range(
year.div.text, periods=len(self._year_ids),
# freq=pd.datetools.YearEnd(month=period_month))
freq = pd.tseries.offsets.YearEnd(month=period_month))
freq=pd.tseries.offsets.YearEnd(month=period_month))
unit = left.find(u'div', {u'id': u'unitsAndFiscalYear'})
self._fiscal_year_end = int(unit.attrs[u'fyenumber'])
self._currency = unit.attrs[u'currency']
Expand All @@ -334,7 +348,7 @@ def _parse(self, soup):
columns=[u'parent_index', u'title'] + list(
self._period_range))

def _read_labels(self, root_node, parent_label_index = None):
def _read_labels(self, root_node, parent_label_index=None):
u"""Recursively reads labels from the parsed HTML response.
"""
for node in root_node:
Expand All @@ -344,7 +358,7 @@ def _read_labels(self, root_node, parent_label_index = None):
node.attrs[u'id'].startswith(u'label') and
not node.attrs[u'id'].endswith(u'padding') and
(not node.has_attr(u'style') or
u'display:none' not in node.attrs[u'style'])):
u'display:none' not in node.attrs[u'style'])):
label_id = node.attrs[u'id'][6:]
label_title = (node.div.attrs[u'title']
if node.div.has_attr(u'title')
Expand All @@ -353,8 +367,8 @@ def _read_labels(self, root_node, parent_label_index = None):
u'id': label_id,
u'index': self._label_index,
u'parent_index': (parent_label_index
if parent_label_index is not None
else self._label_index),
if parent_label_index is not None
else self._label_index),
u'title': label_title})
self._label_index += 1

Expand All @@ -368,14 +382,14 @@ def _read_data(self, root_node):
node.attrs[u'id'].startswith(u'data') and
not node.attrs[u'id'].endswith(u'padding') and
(not node.has_attr(u'style') or
u'display:none' not in node.attrs[u'style'])):
u'display:none' not in node.attrs[u'style'])):
data_id = node.attrs[u'id'][5:]
while (self._data_index < len(self._data) and
self._data[self._data_index][u'id'] != data_id):
self._data[self._data_index][u'id'] != data_id):
# In some cases we do not have data for all labels.
self._data_index += 1
assert(self._data_index < len(self._data) and
self._data[self._data_index][u'id'] == data_id)
assert (self._data_index < len(self._data) and
self._data[self._data_index][u'id'] == data_id)
for (i, child) in enumerate(node.children):
try:
value = float(child.attrs[u'rawvalue'])
Expand Down Expand Up @@ -466,13 +480,13 @@ def _get_db_replace_values(ticker, frame,
u'REPLACE INTO `%s`\n' % table_name +
u' (%s)\nVALUES\n' % u', '.join(columns) +
u',\n'.join([u'("' + ticker + u'", %d, %d, "%s", ' %
(index, frame.ix[index, u'parent_index'],
frame.ix[index, u'title']) +
u', '.join(
[u'NULL' if np.isnan(frame.ix[index, period])
else u'%.5f' % frame.ix[index, period]
for period in frame.columns[2:]]) + u')'
for index in frame.index]))
(index, frame.ix[index, u'parent_index'],
frame.ix[index, u'title']) +
u', '.join(
[u'NULL' if np.isnan(frame.ix[index, period])
else u'%.5f' % frame.ix[index, period]
for period in frame.columns[2:]]) + u')'
for index in frame.index]))


def _db_table_exists(table_name, conn):
Expand Down
42 changes: 42 additions & 0 deletions tests/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-


from unittest import TestCase

from morningstar import good_morning as gm
from morningstar.good_morning import KeyRatiosDownloader

class TestDownloadReturns(TestCase):
def test_downloadreturn(self):
kr = gm.KeyRatiosDownloader()
frames = kr.download('aapl')
test = len(frames)
exp = 11
self.assertEqual(test, exp, "Download is working")

def test_download_fail_empty(self):
kr = gm.KeyRatiosDownloader()
tickersym = ''
exp = "You did not enter a ticker symbol. Please try again."
with self.assertRaises(Exception) as context:
checked = kr.download(tickersym)
the_exception = context.exception
return self.assertEqual(exp, str(the_exception),
"Passing an empty string to "
"good_morning fails")

def test_download_fail_invalid(self):
kr = gm.KeyRatiosDownloader()
tickersym = 'nothing'
exp = "MorningStar cannot find the ticker symbol you entered " \
"or it is INVALID. Please try again."
with self.assertRaises(Exception) as context:
checked = kr.download(tickersym)
the_exception = context.exception
return self.assertEqual(exp, str(the_exception),
"Passing an invalid ticker symbol to"
" good_morning fails")



Loading

0 comments on commit 3df0634

Please sign in to comment.