-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchap11.qmd
147 lines (126 loc) · 3.21 KB
/
chap11.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
---
title: "必知必会第11课:使用子查询"
author: "黄天元"
format: html
editor: visual
---
## 数据库的连接
```{r}
library(pacman)
p_load(tidyverse,DBI,RSQLite)
mydb = dbConnect(RSQLite::SQLite(), "data/TYSQL.sqlite")
```
## 基于SQL的数据操作
```{r}
# 11.2
dbGetQuery(mydb,"SELECT order_num
FROM OrderItems
WHERE prod_id = 'RGAN01';")
dbGetQuery(mydb,"SELECT cust_id
FROM Orders
WHERE order_num IN (20007,20008);")
dbGetQuery(mydb,"SELECT cust_id
FROM Orders
WHERE order_num IN (SELECT order_num
FROM OrderItems
WHERE prod_id = 'RGAN01');")
dbGetQuery(mydb,"SELECT cust_name, cust_contact
FROM Customers
WHERE cust_id IN (1000000004,1000000005);")
dbGetQuery(mydb,"SELECT cust_name, cust_contact
FROM Customers
WHERE cust_id IN (SELECT cust_id
FROM Orders
WHERE order_num IN (SELECT order_num
FROM OrderItems
WHERE prod_id = 'RGAN01'));")
# 11.3
dbGetQuery(mydb,"SELECT COUNT(*) AS orders
FROM Orders
WHERE cust_id = 1000000001;")
dbGetQuery(mydb,"SELECT cust_name,
cust_state,
(SELECT COUNT(*)
FROM Orders
WHERE Orders.cust_id = Customers.cust_id) AS orders
FROM Customers
ORDER BY cust_name;")
dbGetQuery(mydb,"SELECT cust_name,
cust_state,
(SELECT COUNT(*)
FROM Orders
WHERE cust_id = cust_id) AS orders
FROM Customers
ORDER BY cust_name;")
```
## 基于tidyverse的数据操作
```{r}
# 11.2
tbl(mydb,"OrderItems") %>%
filter(prod_id == 'RGAN01') %>%
pull(order_num) -> query1
tbl(mydb,"Orders") %>%
filter(order_num %in% query1) %>%
pull(cust_id) -> query2
tbl(mydb,"Customers") %>%
filter(cust_id %in% query2) %>%
select(cust_name, cust_contact)
# 11.3
tbl(mydb,"Orders") %>%
count(cust_id,name = "orders") -> query4
tbl(mydb,"Customers") %>%
select(cust_name,cust_state,cust_id) %>%
left_join(query4) %>%
arrange(cust_name)
```
## 练习
- 请使用SQL和R两种方式,解决课后的挑战题。
```{r}
#| echo: false
#| eval: false
## SQL
# 参考:https://forta.com/books/0135182794/challenges/
## R
# 1
tbl(mydb,"OrderItems") %>%
filter(item_price >= 10) %>%
pull(order_num) -> query1
tbl(mydb,"Orders") %>%
filter(order_num %in% query1) %>%
select(cust_id)
# 2
tbl(mydb,"OrderItems") %>%
filter(prod_id == 'BR01') %>%
pull(order_num) -> query1
tbl(mydb,"Orders") %>%
filter(order_num %in% query1) %>%
select(cust_id, order_date) %>%
arrange(order_date)
# 3
tbl(mydb,"OrderItems") %>%
filter(prod_id == 'BR01') %>%
pull(order_num) -> query1
tbl(mydb,"Orders") %>%
filter(order_num %in% query1) %>%
pull(cust_id) -> query2
tbl(mydb,"Customers") %>%
filter(cust_id %in% query2) %>%
select(cust_email)
# 4
tbl(mydb,"OrderItems") %>%
group_by(order_num) %>%
summarise(total_ordered = sum(item_price*quantity,na.rm = T)) -> query1
tbl(mydb,"Orders") %>%
select(cust_id,order_num) %>%
inner_join(query1) %>%
select(-order_num) %>%
arrange(desc(total_ordered))
# 5
tbl(mydb,"OrderItems") %>%
group_by(prod_id) %>%
summarise(quant_sold = sum(quantity,na.rm = T)) -> query1
tbl(mydb,"Products") %>%
select(prod_name,prod_id) %>%
left_join(query1) %>%
select(-prod_id)
```