-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProductFeed.py
175 lines (133 loc) · 5.33 KB
/
ProductFeed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from utiltools import *
class ProductFeeds(UtilTools):
def __init__(self):
self.files = ('https://s3-eu-west-1.amazonaws.com/pricesearcher-code-tests/python-software-developer/products.csv.gz' \
,'https://s3-eu-west-1.amazonaws.com/pricesearcher-code-tests/python-software-developer/products.json'
,'https://s3-eu-west-1.amazonaws.com/pricesearcher-code-tests/python-software-developer/products.xml.zip')
super(ProductFeeds, self).__init__()
def the_writer(self,data, FILE = 'final_product_feed.csv'):
"""
Method writes the final product feed file as a csv
1 - check if file already exists if not then create and add headers
2 - Write data held in the object as lines in csv
Args:
data: data as an object usually a list of dicts
FILE: defaults to final_product_feed.csv can ovewrite with value
"""
from pathlib import Path
if not Path(FILE).is_file():
with open(FILE,'w') as f:
f.writelines('Id, Name, Brand, Retailer, Price, InStock, Source \n')
with open(FILE,'a') as f:
for row in data:
f.writelines(', '.join(row.values()) + '\n')
def transformer(self, data):
""" depending on data source transformations are applied to the data object
Args:
data: an iterable object of the product feed
"""
transformed_data = ''
if data[1]['source'] == 'json':
for row in data:
row['price'] = self.fix_price(row.get('price', 0))
row['in_stock'] = self.fix_stock(row.get('in_stock', 0))
if row.get('brand', None) is None:
row['brand'] = ' '
if row.get('retailer', None) is None:
row['retailer'] = ' '
transformed_data = data
elif data[1]['source'] == 'xml':
transformed_data = []
for row in data:
row['latest_price'] = self.fix_price(row['latest_price'])
row['available'] = self.fix_stock(row['available'])
transformed_data.append({'id': row['id']
, 'name': row['name']
, 'brand': row['brand']
, 'retailer': row['retailer']
, 'latest_price': row['latest_price']
, 'available': row['available']
, 'source': 'xml'
})
elif data[1]['source' == 'csv']:
for row in data:
for col in row:
if not isinstance(row[col], str):
row[col] = str(row[col])
row['Price'] = self.fix_price(row['Price'])
row['InStock'] = self.fix_stock(row['InStock'])
transformed_data = data
return transformed_data
@staticmethod
def fix_price(val):
"""
the method evaluates the data passed in and returns a numeric value
to two decimal places.
Args:
val: this is the price value passed into the function
"""
if val == False:
return ''
elif val == True:
return ''
elif val == '':
return '0.00'
if hasattr(val,'find') == False:
return ''
start = val.find('.')
if start == -1:
val += '.00'
return val
if len(val[start + 1:]) == 1:
val += '0'
return val
elif len(val[start + 1:]) == 0:
val += '00'
return val
else:
return val
@staticmethod
def fix_stock(val):
"""
the method evaluates the data passed in and returns a string value
either 1 to indication true and 0 for false that the item is in stock
Args:
val: this is the indication if an item is in stock
"""
if val == False:
return '0'
elif val == True:
return '1'
val = val[0].lower()
if val == 'y' or val == 't':
return '1'
if val == 'n' or val == 'f':
return '0'
else:
return ''
def run(self):
"""
Main execution method
1 - downloads file
2 - creates csv, xml , json objects
3 - apply product feed transformations
4 - write data to final_product file
"""
file_list = []
for url in self.files:
print(url)
filename = self.download_file(url)
#file_list.append(filename)
file_list.append(self.BUCKET + os.path.basename(url))
for file in file_list:
filename , file_extension = os.path.splitext(file)
file_extension = file_extension[1:]
if file_extension == 'csv':
csv_data = self.transformer(self.csv_reader)
self.the_writer(csv_data)
elif file_extension == 'json':
json_data = self.transformer(self.json_reader(file))
self.the_writer(json_data)
elif file_extension == 'xml':
xml_data = self.transformer(self.xml_reader(file))
self.the_writer(xml_data)