-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsec.py
78 lines (61 loc) · 2.77 KB
/
sec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import logging
from sec_api import ExtractorApi, QueryApi, XbrlApi
MAX_TOKENS_CNT = 3000
class SEC:
def __init__(self, my_api_key):
self.queryApi = QueryApi(api_key=my_api_key)
self.extractorApi = ExtractorApi(api_key=my_api_key)
self.xbrlApi = XbrlApi(api_key=my_api_key)
def get_10K_urls(self, stock: str, start_date: str, end_date: str):
query = {
"query": { "query_string": {
"query": "ticker:" + stock +
" AND filedAt:{" + start_date + " TO " + end_date + "}" +
" AND formType:\"10-K\"" +
" AND NOT formType:\"NT 10-K\"" +
" AND NOT formType:\"10-K/A\""
} },
"from": "0",
"size": "10",
"sort": [{ "filedAt": { "order": "desc" } }]
}
response = self.queryApi.get_filings(query)
# no more filings in search universe
if not response["filings"]:
logging.warning("No filings found for the given query.")
return []
return [filing["linkToFilingDetails"] for filing in response["filings"]]
def __get_text_from_10K(self, filing_url: str, section: str):
return self.extractorApi.get_section(filing_url, section, "text")
# Financial Statements and Supplementary Data
def get_section_8(self, filing_url: str):
return self.__get_text_from_10K(filing_url, "8")
# Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
def get_section_7(self, filing_url: str):
return self.__get_text_from_10K(filing_url, "7")
def access_financial_statements(self, filing_url: str):
return self.xbrlApi.xbrl_to_json(htm_url=filing_url)
def section_7_to_list(self, filing_url: str):
text = self.get_section_7(filing_url)
token_lst = []
tokens = ""
token_count = 0
max_tokens = MAX_TOKENS_CNT
for line in text.splitlines():
line_token_count = len(line.split(" "))
# skip line if it has less than 20 tokens
if line_token_count < 20:
continue
# add line to current tokens if it does not exceed the maximum token count
if token_count + line_token_count <= max_tokens:
tokens += line
token_count += line_token_count
# if it exceeds the maximum token count,
# add the current tokens to the list and reset the token count and tokens
else:
token_lst.append(tokens)
tokens = line
token_count = line_token_count
# add the final set of tokens to the list
token_lst.append(tokens)
return token_lst