-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path#low performance of setkey #4788.Rmd
142 lines (120 loc) · 4.37 KB
/
#low performance of setkey #4788.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
---
title: 'Low performance of setkey function for version 1.12.0 and later #4788'
author: "Doris Amoakohene"
date: "`r Sys.Date()`"
output: html_document
---
https://github.com/Rdatatable/data.table/issues/4788
https://github.com/Rdatatable/data.table/issues/4788#issuecomment-719669227
```{r}
library(atime)
library(data.table)
library(ggplot2)
library(git2r)
```
```{r}
tdir <- tempfile()
dir.create(tdir)
git2r::clone("https://github.com/Rdatatable/data.table", tdir)
```
```{r}
atime.list <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(3,8),
setup={
set.seed(1L)
dt <- data.table(
x = as.character(sample(N, N, FALSE)),
y = runif(N))
},
expr=data.table:::setkey(dt, x, verbose = TRUE),
"Before"="a4e26b50beaf0bb2aac40bbf47f9d1745579154a",#https://github.com/Rdatatable/data.table/commit/a4e26b50beaf0bb2aac40bbf47f9d1745579154a
"Regression"="34796cd1524828df9bf13a174265cb68a09fcd77") #https://github.com/Rdatatable/data.table/commit/34796cd1524828df9bf13a174265cb68a09fcd77,34796cd1524828df9bf13a174265cb68a09fcd77
#"Fixed"="63632e6f55f1f5289c689edab37f6a69d2df25cf") #https://github.com/Rdatatable/data.table/commit/63632e6f55f1f5289c689edab37f6a
```
```{r}
plot(atime.list)
```
```{r}
ret<- reprex({
atime.list <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(3,8),
setup={
n <- N/100
set.seed(1L)
dt <- data.table(
x = as.character(sample(5e6L, 5e6L, FALSE)),
y = runif(n))
},
expr=data.table:::`[.data.table`(setkey(dt, x, verbose = TRUE)),
"Before"="76bb569fd7736b5f89471a35357e6a971ae1d424",#https://github.com/Rdatatable/data.table/commit/76bb569fd7736b5f89471a35357e6a971ae1d424
"Regression"="34796cd1524828df9bf13a174265cb68a09fcd77", #https://github.com/Rdatatable/data.table/commit/34796cd1524828df9bf13a174265cb68a09fcd77,34796cd1524828df9bf13a174265cb68a09fcd77
"Fixed"="63632e6f55f1f5289c689edab37f6a69d2df25cf") #https://github.com/Rdatatable/data.table/commit/63632e6f55f1f5289c689edab37f6a
```
```{r}
```