-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsql_project.py
124 lines (94 loc) · 3.67 KB
/
sql_project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
"""SQL-Project.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1aAlkn4hnDv387DITDerQslfBiTxlGu3d
#Project Overview:
Step into the role of a data analyst for the Chinook online music store! This project highlights my SQL querying ability, uncovering insights from a rich music database. Using SQL techniques like joins, aggregate functions, and grouping, I analyzed sales data to answer critical business questions and visualized key trends to inform strategic decisions.
# import
"""
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
#create connection and cursor to the chinook database
conn = sqlite3.connect("/content/Chinook_Sqlite.sqlite")
curs = conn.cursor()
curs.execute("SELECT Name FROM sqlite_master WHERE type = 'table'")
tables = curs.fetchall()
tables
# execute qwereez 1: total revenue
curs.execute('''SELECT SUM(Total) FROM Invoice''')
total_revenue = curs.fetchone()[0]
print(f"Total revenue: ${total_revenue:.2f}")
"""# revenue by country, desc"""
curs.execute('''SELECT BillingCountry, SUM(Total) AS Revenue
FROM Invoice
GROUP BY BillingCountry
ORDER BY Revenue DESC''')
revenue_by_country = curs.fetchall()
# dataframe this
rev_by_country = pd.DataFrame(revenue_by_country, columns=['Country', 'Revenue'])
rev_by_country
"""# top 5 selling albums
"""
# top five selling albums
curs.execute('''SELECT Artist.Name, Album.Title, sum(InvoiceLine.Quantity) AS quantity_sold
FROM InvoiceLine
JOIN Track ON InvoiceLine.TrackId = Track.TrackId
JOIN Album ON Track.AlbumId = Album.AlbumId
JOIN Artist ON Album.ArtistId = Artist.ArtistId
GROUP BY Album.Title
ORDER BY quantity_sold DESC
LIMIT 5''')
top_five_selling_albums = curs.fetchall()
top_five_selling_albums
# data FRAME IT
top_five_selling_albums = pd.DataFrame(top_five_selling_albums, columns=['Artist Name', 'Album Title', 'Quantity Sold'])
top_five_selling_albums
"""# Top 5 selling artists"""
# Top five selling Artisits
curs.execute('''SELECT Artist.Name, sum(InvoiceLine.Quantity) AS amount_sold
FROM InvoiceLine
JOIN Track ON InvoiceLine.TrackId = Track.TrackId
JOIN Album ON Track.AlbumId = Album.AlbumId
JOIN Artist ON Album.ArtistId = Artist.ArtistId
GROUP BY Artist.Name
ORDER BY amount_sold DESC
LIMIT 5''')
top_five_selling_artists = curs.fetchall()
top_five_selling_artists
# FRAME IT!!
top_five_selling_artists = pd.DataFrame(top_five_selling_artists, columns=['Artist Name', 'Amount Sold'])
top_five_selling_artists
"""# Top five selling Tracks"""
# Top five selling Tracks
curs.execute('''SELECT Track.Name, sum(InvoiceLine.Quantity) AS amount_sold
FROM InvoiceLine
JOIN Track ON InvoiceLine.TrackId = Track.TrackId
GROUP BY Track.Name
ORDER BY amount_sold DESC
LIMIT 5''')
top_five_selling_tracks = curs.fetchall()
top_five_selling_tracks
# FRAME IT!!
top_five_selling_tracks = pd.DataFrame(top_five_selling_tracks, columns=['Track Name', 'Amount Sold'])
top_five_selling_tracks
"""# Ploty graph"""
# install ploty for ploty express
!pip install plotly
# import ploty express for visualizations
import plotly.express as px
# graph ploty with interactivity
fig = px.bar(top_five_selling_albums, x='Album Title', y='Quantity Sold', title='Top Five Selling Albums')
fig.show()
fig2 = px.bar(top_five_selling_artists, x='Artist Name', y='Amount Sold', title='Top Five Selling Artists')
fig2.show()
fig3 = px.bar(top_five_selling_tracks, x='Track Name', y='Amount Sold', title='Top Five Selling Tracks')
fig3.show()
# check README for Plotly graphs
print(type(top_five_selling_albums))
print(type(top_five_selling_artists))
print(type(top_five_selling_tracks))
"""#close out connection"""
# close the connection to the database
conn.close()