forked from hhenoida/dataanalytics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1b4-importfiles.R
124 lines (97 loc) · 3.3 KB
/
1b4-importfiles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#Import / Export Data
#Check for files and folders
dir('./data2')
list.files('./data2')
list.files('./data')
file.exists("./data/mtcars.csv")
#Reading from a Flat File into a vector
list.files('./data2')
data = scan("./data2/hhe.txt", what="character")
head(data)
class(data)
#CSV Read from CSV
#First Create as csv file from iris data set
head(iris)
?iris
write.csv(iris, "./data/iris.csv", row.names=F)
#goto folder data and see iris.csv
read1 = read.csv(file="./data/iris.csv", header = TRUE,sep = ",")
str(read1); class(read1)
head(read1)
read2 = read.table(file="./data/iris.csv", header = TRUE,sep = ",")
str(read2);class(read2)
head(read2)
read3 = read.delim(file="./data/iris.csv", header = TRUE,sep = ",")
str(read3) ; class(read3)
head(read3)
#or location is different from Project Folders, or want to search for the file
read4 = read.csv(file=file.choose())
str(read4)
head(read4)
#CSV file from web
read_web1 = read.csv('http://www.stats.ox.ac.uk/pub/datasets/csb/ch11b.dat')
head(read_web1)
#using library
library(data.table)
read_web2 = fread("http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv")
head(read_web2)
class(read_web2)
#Text File from Web
read_txt = read.table("https://s3.amazonaws.com/assets.datacamp.com/blog_assets/test.txt", header = FALSE)
head(read_txt)
#difference is use of specify delimeter(read.csv takes default as comma)
#----------------------
library(XML)
chess = 'http://ratings.fide.com/top.phtml?list=men'
#chess = 'http://ratings.fide.com/top.phtml?list=women'
chess.table = readHTMLTable(chess, header=T, which=5,stringsAsFactors=F)
head(chess.table)
#---------
library(haven)
# SAS
write_sas(mtcars, "./data2/mtcars.sas7bdat")
df_sas =read_sas("./data2/mtcars.sas7bdat")
head(df_sas)
# SPSS
write_sav(women, "./data2/women.sav")
df_spss = read_sav("./data2/women.sav")
head(df_spss)
#other Software
library(foreign)
# Read the SPSS data
df_spss2 <- read.spss("./data2/women.sav")
head(as.data.frame(df_spss2))
class(df_spss2)
#---------------------------------------
#Read from Table in Web
library('rvest'); library('xml2')
#One Table
url <- xml2::read_html("http://www.worldatlas.com/articles/largest-cities-in-europe-by-population.html")
(tbls = rvest::html_nodes(url, "table")) #check if table is there
tbls_read <- url %>% html_nodes("table") %>% html_table(fill = TRUE)
tbls_read
#Reading from Multiple Table in Websites
url2 = read_html("https://tradingeconomics.com/india/indicators")
(tbls = rvest::html_nodes(url2, "table")) #check if table is there
tbls2_read <- url2 %>% html_nodes("table") %>% html_table(fill = TRUE)
tbls2_read
tbls2_read[1]
#---------------------------------------
#Import from Google Sheet
library(gsheet)
url_gsheet = "https://docs.google.com/spreadsheets/d/1QogGSuEab5SZyZIw1Q8h-0yrBNs1Z_eEBJG7oRESW5k/edit#gid=107865534"
df_gsheet = as.data.frame(gsheet2tbl(url_gsheet))
head(df_gsheet)
#Import from Excel
# read in the first worksheet from the workbook myexcel.xlsx
# first row contains variable names
library(xlsx)
library(rJava)
Sys.setenv(JAVA_HOME='C:\\Program Files\\Java\\jre1.8.0_181')
df_excel1 = read.xlsx("./data2/myexcel.xlsx", 1)
head(df_excel1)
# read in the worksheet named mysheet
df_excel2a = read.xlsx("./data2/myexcel.xlsx", sheetName = "bowlers")
head(df_excel2a)
df_excel2b = read.xlsx("./data2/myexcel.xlsx", sheetIndex = 2)
head(df_excel2b)