-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjoin operation almost 2 times slower.Rmd
131 lines (118 loc) · 4.34 KB
/
join operation almost 2 times slower.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
---
title: 'join operation almost 2 times slower #3928'
author: "Doris Amoakohene"
date: "`r Sys.Date()`"
output: html_document
---
https://github.com/Rdatatable/data.table/issues/3928
https://github.com/Rdatatable/data.table/issues/3928#issuecomment-651408089
https://github.com/Rdatatable/data.table/pull/3435
```{r}
library(atime)
library(data.table)
library(ggplot2)
library(git2r)
```
```{r}
tdir <- tempfile()
dir.create(tdir)
git2r::clone("https://github.com/Rdatatable/data.table", tdir)
```
```{r}
atime.list.3928 <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(3,8),
setup={
set.seed(108)
n2 = 1:N
n1 = sample(n2, N)
d1=data.table(id1=paste0("id",sample(n1)), id2=paste0("id",sample(n1)), v1=n1)
d2=data.table(id1=paste0("id",sample(n2)), id2=paste0("id",sample(n2)), v2=n2)
},
expr=data.table:::`[.data.table`(d2(d1, v1 := i.v1, on=c("id1","id2"))),
"Before"="8b201fd28f5d4afcc4be026a5d9eb4bb6dd62955",#https://github.com/Rdatatable/data.table/commit/8b201fd28f5d4afcc4be026a5d9eb4bb6dd62955
"Regression"="e0140eafa07ad39a7ac9d67737cc43c4c95ca935") #https://github.com/Rdatatable/data.table/commit/e0140eafa07ad39a7ac9d67737cc43c4c95ca935
#"Fixed"="58409197426ced4714af842650b0cc3b9e2cb842") #fixed:#https://github.com/Rdatatable/data.table/pull/5463/commits; taken from the last commits in here. for fixed.
```
```{r}
ret<- reprex({
atime.list <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(3,8),
setDTthreads(2L),
setup={
set.seed(108)
n2 = 1:N
n1 = sample(n2, N)
d1=data.table(id1=paste0("id",sample(n1)), id2=paste0("id",sample(n1)), v1=n1)
d2=data.table(id1=paste0("id",sample(n2)), id2=paste0("id",sample(n2)), v2=n2)
},
expr=data.table:::`[.data.table`(d2(d1, v1 := i.v1, on=c("id1","id2"))),
"Before"="8b201fd28f5d4afcc4be026a5d9eb4bb6dd62955",#https://github.com/Rdatatable/data.table/commit/8b201fd28f5d4afcc4be026a5d9eb4bb6dd62955
"Regression"="e0140eafa07ad39a7ac9d67737cc43c4c95ca935")
})
```