-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_and_load_db.py
56 lines (47 loc) · 1.75 KB
/
create_and_load_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import duckdb
con = duckdb.connect("AIRBNB.db")
sql_stmt = """
CREATE SCHEMA RAW;
USE RAW;
CREATE OR REPLACE TABLE raw_listings
(id integer,
listing_url string,
name string,
room_type string,
minimum_nights integer,
host_id integer,
price string,
created_at datetime,
updated_at datetime);
COPY raw_listings (id,
listing_url,
name,
room_type,
minimum_nights,
host_id,
price,
created_at,
updated_at)
from 's3://dbtlearn/listings.csv'
WITH (HEADER true);
CREATE OR REPLACE TABLE raw_reviews
(listing_id integer,
date datetime,
reviewer_name string,
comments string,
sentiment string);
COPY raw_reviews (listing_id, date, reviewer_name, comments, sentiment)
from 's3://dbtlearn/reviews.csv'
WITH (HEADER true);
CREATE OR REPLACE TABLE raw_hosts
(id integer,
name string,
is_superhost string,
created_at datetime,
updated_at datetime);
COPY raw_hosts (id, name, is_superhost, created_at, updated_at)
from 's3://dbtlearn/hosts.csv'
WITH (HEADER true);
"""
con.sql(sql_stmt)
con.close()