Fixes petercerno#6 and petercerno#7 url problems. Also adds unit test…

… and exception handling for passing empty string and invalid ticker symbol
linwoodc3 · Jul 23, 2017 · 3df0634 · 3df0634
1 parent 1d9857a
commit 3df0634
Show file tree

Hide file tree

Showing 5 changed files with 190 additions and 45 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,41 @@
+language: python
+cache: pip
+matrix:
+    include:
+        - os: linux
+          sudo: required
+          python: 3.4
+          env: GDELT=py34
+        - os: linux
+          sudo: required
+          python: 3.5
+          env: GDELT=py35
+        - os: linux
+          sudo: required
+          python: 3.6
+          env: GDELT=py36
+        - os: osx
+          language: generic
+          env: GDELT=py34
+        - os: osx
+          language: generic
+          env: GDELT=py35
+        - os: osx
+          language: generic
+          env: GDELT=py36
+
+install:
+    - source travis/install.sh
+    - 'pip install pip -U'
+    - "pip install pytest-cov"
+script:
+  - py.test --cov=./
+
+cache:
+  directories:
+    - $HOME/.cache/pip
+before_cache:
+  - rm -f $HOME/.cache/pip/log/debug.log
+
+after_success:
+  - codecov
diff --git a/README.md b/README.md
@@ -13,16 +13,16 @@ Motivation
 
 Good Morning is intended to be used as an extension to [QSToolKit (QSTK)](http://wiki.quantsoftware.org/index.php?title=QuantSoftware_ToolKit) library. By using [QSTK](http://wiki.quantsoftware.org/index.php?title=QuantSoftware_ToolKit) you can easily download historical stock market data from [Yahoo Finance](http://finance.yahoo.com/). You can also download fundamental financial data from [Compustat](https://www.capitaliq.com/home/what-we-offer/information-you-need/financials-valuation/compustat-financials.aspx). However, most individuals and institutions do not have access to [Compustat](https://www.capitaliq.com/home/what-we-offer/information-you-need/financials-valuation/compustat-financials.aspx). Good Morning attempts to mitigate this limitation by providing a very simple Python interface for downloading fundamental financial data from [financials.morningstar.com](http://financials.morningstar.com/).
 
-Example
+Example of Downloading Key Ratios from MorningStar
 =======
 
-    import good_morning as gm
+    import morningstar as gm
     kr = gm.KeyRatiosDownloader()
     kr_frames = kr.download('AAPL')
 
 The variable `kr_frames` now holds an array of [`pandas.DataFrame`](http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html)s containing the key ratios for the morningstar ticker [`AAPL`](http://financials.morningstar.com/ratios/r.html?t=AAPL&region=usa&culture=en-US).
 
-    print kr_frames[0]
+    print (kr_frames[0])
 
 Outputs:
 
@@ -43,6 +43,24 @@ If we specify the MySQL connection `conn` the retrieved data will be uploaded to
         host = DB_HOST, user = DB_USER, passwd = DB_PASS, db = DB_NAME)
     kr_frames = kr.download('AAPL', conn)
 
+
+Example of Downloading Financials (e.g. *income statement, balance sheet, cash flow*) from MorningStar 
+=======
+
+    import morningstar as gm
+    kr = gm.FinancialsDownloader()
+    kr_fins = kr.download('AAPL')
+
+Different from the `KeyRatiosDownloader` class,  `kr_fins` now holds a dictionary containing the financials for the morningstar ticker [`AAPL`](http://financials.morningstar.com/ratios/r.html?t=AAPL&region=usa&culture=en-US). The financials **may differ** from company to company.
+
+    print (fins.keys())
+
+Output:
+
+    dict_keys(['income_statement', 'balance_sheet', 'cash_flow', 'period_range', 'fiscal_year_end', 'currency'])
+
+
+
 Every [`pandas.DataFrame`](http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html) in the array `kr_frames` will be uploaded to a different database table. In our case the following tables will be created: 
 
     `morningstar_key_balance_sheet_items_in_percent`

diff --git a/morningstar/good_morning.py b/morningstar/good_morning.py
@@ -33,18 +33,19 @@
 from bs4 import BeautifulSoup, Tag
 from datetime import date
 
+
 class KeyRatiosDownloader(object):
     u"""Downloads key ratios from http://financials.morningstar.com/
     """
 
-    def __init__(self, table_prefix = u'morningstar_'):
+    def __init__(self, table_prefix=u'morningstar_'):
         u"""Constructs the KeyRatiosDownloader instance.
 
         :param table_prefix: Prefix of the MySQL tables.
         """
         self._table_prefix = table_prefix
 
-    def download(self, ticker, conn = None):
+    def download(self, ticker, conn=None):
         u"""Downloads and returns key ratios for the given Morningstar ticker.
 
         Downloads and returns an array of pandas.DataFrames containing the key
@@ -56,10 +57,10 @@ def download(self, ticker, conn = None):
         :param conn: MySQL connection.
         :return: List of pandas.DataFrames containing the key ratios.
         """
-        url = (ur'http://financials.morningstar.com/ajax/exportKR2CSV.html?' +
-               ur'&callback=?&t={0}&region=usa&culture=en-US&cur=USD'.format(
+        url = (r'http://financials.morningstar.com/ajax/exportKR2CSV.html?' +
+               r'&callback=?&t={0}&region=usa&culture=en-US&cur=USD'.format(
                    ticker))
-        with urllib2.urlopen(url) as response:
+        with urllib.request.urlopen(url) as response:
             tables = self._parse_tables(response)
             response_structure = [
                 # Original Name, New pandas.DataFrame Name
@@ -77,7 +78,18 @@ def download(self, ticker, conn = None):
                 (u'Key Ratios -> Financial Health',
                  u'Key Liquidity/Financial Health'),
                 (u'Key Ratios -> Efficiency Ratios', u'Key Efficiency Ratios')]
+
             frames = self._parse_frames(tables, response_structure)
+
+            if len(ticker) == 0:
+                raise ValueError("You did not enter a ticker symbol.  Please"
+                                 " try again.")
+            elif frames == "MorningStar could not find the ticker":
+
+                raise ValueError("MorningStar cannot find the ticker symbol "
+                                 "you entered or it is INVALID. Please try "
+                                 "again.")
+
             currency = re.match(u'^.* ([A-Z]+) Mil$',
                                 frames[0].index[0]).group(1)
             frames[0].index.name += u' ' + currency
@@ -96,7 +108,7 @@ def _parse_tables(response):
         """
         # Regex pattern used to recognize csv lines containing financial data.
         num_commas = 5
-        pat_commas = ur'(.*,){%d,}' % num_commas
+        pat_commas = r'(.*,){%d,}' % num_commas
         # Resulting array of pairs (table_name, table_frame).
         tables = []
         table_name = None
@@ -126,9 +138,11 @@ def _parse_frames(tables, response_structure):
         :param response_structure: List of pairs (expected table name, new name
         assigned to the corresponding (processed) pandas.DataFrame).
         """
+        if len(tables) == 0:
+            return ("MorningStar could not find the ticker")
         period_start = tables[0][1].ix[0][1]
         period_month = pd.datetime.strptime(period_start, u'%Y-%m').month
-        #period_freq = pd.datetools.YearEnd(month=period_month)
+        # period_freq = pd.datetools.YearEnd(month=period_month)
         period_freq = pd.tseries.offsets.YearEnd(month=period_month)
         frames = []
         for index, (check_name, frame_name) in enumerate(response_structure):
@@ -157,7 +171,7 @@ def _process_frame(frame, frame_name, period_start,
                                                periods=len(output_frame.ix[0]),
                                                freq=period_freq)
         output_frame.columns.name = u'Period'
-        if re.match(ur'^\d{4}-\d{2}$', output_frame.ix[0][0]):
+        if re.match(r'^\d{4}-\d{2}$', output_frame.ix[0][0]):
             output_frame.drop(output_frame.index[0], inplace=True)
         output_frame.replace(u',', u'', regex=True, inplace=True)
         output_frame.replace(u'^\s*$', u'NaN', regex=True, inplace=True)
@@ -188,8 +202,8 @@ def _get_db_name(name):
                 .replace(u'/', u' per ')
                 .replace(u'&', u' and ')
                 .replace(u'%', u' percent '))
-        name = re.sub(ur'[^a-z0-9]', u' ', name)
-        name = re.sub(ur'\s+', u' ', name).strip()
+        name = re.sub(r'[^a-z0-9]', u' ', name)
+        name = re.sub(r'\s+', u' ', name).strip()
         return name.replace(u' ', u'_')
 
     def _get_db_table_name(self, frame):
@@ -237,24 +251,24 @@ def _get_db_replace_values(self, ticker, frame):
             u'REPLACE INTO `%s`\n' % self._get_db_table_name(frame) +
             u'  (%s)\nVALUES\n' % u',\n   '.join(columns) +
             u',\n'.join([u'("' + ticker + u'", "' + column.strftime(u'%Y-%m-%d') +
-                        u'", ' +
-                        u', '.join([u'NULL' if np.isnan(x) else u'%.5f' % x
-                                   for x in frame[column].values]) +
-                        u')' for column in frame.columns]))
+                         u'", ' +
+                         u', '.join([u'NULL' if np.isnan(x) else u'%.5f' % x
+                                     for x in frame[column].values]) +
+                         u')' for column in frame.columns]))
 
 
 class FinancialsDownloader(object):
     u"""Downloads financials from http://financials.morningstar.com/
     """
 
-    def __init__(self, table_prefix = u'morningstar_'):
+    def __init__(self, table_prefix=u'morningstar_'):
         u"""Constructs the FinancialsDownloader instance.
 
         :param table_prefix: Prefix of the MySQL tables.
         """
         self._table_prefix = table_prefix
 
-    def download(self, ticker, conn = None):
+    def download(self, ticker, conn=None):
         u"""Downloads and returns a dictionary containing pandas.DataFrames
         representing the financials (i.e. income statement, balance sheet,
         cash flow) for the given Morningstar ticker. If the MySQL connection
@@ -268,9 +282,9 @@ def download(self, ticker, conn = None):
         """
         result = {}
         for report_type, table_name in [
-                (u'is', u'income_statement'),
-                (u'bs', u'balance_sheet'),
-                (u'cf', u'cash_flow')]:
+            (u'is', u'income_statement'),
+            (u'bs', u'balance_sheet'),
+            (u'cf', u'cash_flow')]:
             frame = self._download(ticker, report_type)
             result[table_name] = frame
             if conn:
@@ -292,15 +306,15 @@ def _download(self, ticker, report_type):
         :return  pandas.DataFrame corresponding to the given Morningstar ticker
         and the given type of the report.
         """
-        url = (ur'http://financials.morningstar.com/ajax/' +
-               ur'ReportProcess4HtmlAjax.html?&t=' + ticker +
-               ur'&region=usa&culture=en-US&cur=USD' +
-               ur'&reportType=' + report_type + ur'&period=12' +
-               ur'&dataType=A&order=asc&columnYear=5&rounding=3&view=raw')
-        with urllib2.urlopen(url) as response:
+        url = (r'http://financials.morningstar.com/ajax/' +
+               r'ReportProcess4HtmlAjax.html?&t=' + ticker +
+               r'&region=usa&culture=en-US&cur=USD' +
+               r'&reportType=' + report_type + r'&period=12' +
+               r'&dataType=A&order=asc&columnYear=5&rounding=3&view=raw')
+        with urllib.request.urlopen(url) as response:
             json_text = response.read().decode(u'utf-8')
             json_data = json.loads(json_text)
-            result_soup = BeautifulSoup(json_data[u'result'],u'html.parser')
+            result_soup = BeautifulSoup(json_data[u'result'], u'html.parser')
             return self._parse(result_soup)
 
     def _parse(self, soup):
@@ -321,7 +335,7 @@ def _parse(self, soup):
         self._period_range = pd.period_range(
             year.div.text, periods=len(self._year_ids),
             # freq=pd.datetools.YearEnd(month=period_month))
-            freq = pd.tseries.offsets.YearEnd(month=period_month))
+            freq=pd.tseries.offsets.YearEnd(month=period_month))
         unit = left.find(u'div', {u'id': u'unitsAndFiscalYear'})
         self._fiscal_year_end = int(unit.attrs[u'fyenumber'])
         self._currency = unit.attrs[u'currency']
@@ -334,7 +348,7 @@ def _parse(self, soup):
                             columns=[u'parent_index', u'title'] + list(
                                 self._period_range))
 
-    def _read_labels(self, root_node, parent_label_index = None):
+    def _read_labels(self, root_node, parent_label_index=None):
         u"""Recursively reads labels from the parsed HTML response.
         """
         for node in root_node:
@@ -344,7 +358,7 @@ def _read_labels(self, root_node, parent_label_index = None):
                     node.attrs[u'id'].startswith(u'label') and
                     not node.attrs[u'id'].endswith(u'padding') and
                     (not node.has_attr(u'style') or
-                        u'display:none' not in node.attrs[u'style'])):
+                             u'display:none' not in node.attrs[u'style'])):
                 label_id = node.attrs[u'id'][6:]
                 label_title = (node.div.attrs[u'title']
                                if node.div.has_attr(u'title')
@@ -353,8 +367,8 @@ def _read_labels(self, root_node, parent_label_index = None):
                     u'id': label_id,
                     u'index': self._label_index,
                     u'parent_index': (parent_label_index
-                                     if parent_label_index is not None
-                                     else self._label_index),
+                                      if parent_label_index is not None
+                                      else self._label_index),
                     u'title': label_title})
                 self._label_index += 1
 
@@ -368,14 +382,14 @@ def _read_data(self, root_node):
                     node.attrs[u'id'].startswith(u'data') and
                     not node.attrs[u'id'].endswith(u'padding') and
                     (not node.has_attr(u'style') or
-                        u'display:none' not in node.attrs[u'style'])):
+                             u'display:none' not in node.attrs[u'style'])):
                 data_id = node.attrs[u'id'][5:]
                 while (self._data_index < len(self._data) and
-                       self._data[self._data_index][u'id'] != data_id):
+                               self._data[self._data_index][u'id'] != data_id):
                     # In some cases we do not have data for all labels.
                     self._data_index += 1
-                assert(self._data_index < len(self._data) and
-                       self._data[self._data_index][u'id'] == data_id)
+                assert (self._data_index < len(self._data) and
+                        self._data[self._data_index][u'id'] == data_id)
                 for (i, child) in enumerate(node.children):
                     try:
                         value = float(child.attrs[u'rawvalue'])
@@ -466,13 +480,13 @@ def _get_db_replace_values(ticker, frame,
             u'REPLACE INTO `%s`\n' % table_name +
             u'  (%s)\nVALUES\n' % u', '.join(columns) +
             u',\n'.join([u'("' + ticker + u'", %d, %d, "%s", ' %
-                        (index, frame.ix[index, u'parent_index'],
-                         frame.ix[index, u'title']) +
-                        u', '.join(
-                            [u'NULL' if np.isnan(frame.ix[index, period])
-                             else u'%.5f' % frame.ix[index, period]
-                             for period in frame.columns[2:]]) + u')'
-                        for index in frame.index]))
+                         (index, frame.ix[index, u'parent_index'],
+                          frame.ix[index, u'title']) +
+                         u', '.join(
+                             [u'NULL' if np.isnan(frame.ix[index, period])
+                              else u'%.5f' % frame.ix[index, period]
+                              for period in frame.columns[2:]]) + u')'
+                         for index in frame.index]))
 
 
 def _db_table_exists(table_name, conn):

diff --git a/tests/test_download.py b/tests/test_download.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+
+from unittest import TestCase
+
+from morningstar import good_morning as gm
+from morningstar.good_morning import KeyRatiosDownloader
+
+class TestDownloadReturns(TestCase):
+    def test_downloadreturn(self):
+        kr = gm.KeyRatiosDownloader()
+        frames = kr.download('aapl')
+        test = len(frames)
+        exp = 11
+        self.assertEqual(test, exp, "Download is working")
+
+    def test_download_fail_empty(self):
+        kr = gm.KeyRatiosDownloader()
+        tickersym = ''
+        exp = "You did not enter a ticker symbol.  Please try again."
+        with self.assertRaises(Exception) as context:
+            checked = kr.download(tickersym)
+        the_exception = context.exception
+        return self.assertEqual(exp, str(the_exception),
+                                "Passing an empty string to "
+                                "good_morning fails")
+
+    def test_download_fail_invalid(self):
+        kr = gm.KeyRatiosDownloader()
+        tickersym = 'nothing'
+        exp = "MorningStar cannot find the ticker symbol you entered " \
+              "or it is INVALID. Please try again."
+        with self.assertRaises(Exception) as context:
+            checked = kr.download(tickersym)
+        the_exception = context.exception
+        return self.assertEqual(exp, str(the_exception),
+                                "Passing an invalid ticker symbol to"
+                                " good_morning fails")
+
+
+