Skip to content

Commit

Permalink
add reflink support
Browse files Browse the repository at this point in the history
Add option --reflink to deduplicate identical files via filesystem reflinks.
Reflink is done via the FICLONE ioctl, and allows to share the file
content across multiple inodes. This is similar to hardlink, with two
substantial differences:
- each copy has its own metadata (permissions, timestamps, attributes, etc.)
- on write, the content is copied (CoW) so the changes are not reflected to all other copies

Currently implemented only on Linux, it works with BtrFS and XFS (and ZFS to some extents)
  • Loading branch information
teknoraver committed Dec 29, 2024
1 parent 482509f commit c28cf8b
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 3 deletions.
5 changes: 5 additions & 0 deletions fdupes.1
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ Prompt user for files to preserve, deleting all others (see
.B CAVEATS
below).
.TP
.B -e --reflink
Linux only, attempt to reflink files to preserve space.
Files must be on the same filesystem and the filesystem must support reflinking.
Currently only BtrFS and XFS (and sometimes ZFS) support this feature.
.TP
.B -D --deferconfirmation
In interactive mode, defer byte-for-byte confirmation of
duplicates until just before file deletion.
Expand Down
80 changes: 77 additions & 3 deletions fdupes.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#ifdef __linux__
# include <linux/fs.h>
# include <fcntl.h>
# ifdef FICLONE
# define OPT_E "e"
# include <sys/ioctl.h>
# else
# define OPT_E
# endif
#endif
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
Expand Down Expand Up @@ -1191,6 +1201,49 @@ void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)
free(preservestr);
}

void reflinkfile(file_t *existing, file_t *duplicate)
{
#ifdef FICLONE
int fd1, fd2, ret;

if (existing->device != duplicate->device)
{
fprintf(stderr, "error: %s and %s are not on the same device\n", existing->d_name, duplicate->d_name);
return;
}

fd1 = open(existing->d_name, O_RDONLY);
if (fd1 == -1)
{
printf("error opening %s\n", existing->d_name);
return;
}

fd2 = open(duplicate->d_name, O_WRONLY);
if (fd2 == -1)
{
printf("error opening %s\n", duplicate->d_name);
close(fd1);
return;
}

ret = ioctl(fd2, FICLONE, fd1);
if (ret == -1)
fprintf(stderr, "error reflinking %s to %s: %s\n", existing->d_name, duplicate->d_name, strerror(errno));

close(fd1);
close(fd2);
#endif
}

void reflinkfiles(file_t *files)
{
file_t *first = files;

for (files = first; files->next; files = files->next)
reflinkfile(first, files->next);
}

int sort_pairs_by_arrival(file_t *f1, file_t *f2)
{
if (f2->duplicates != 0)
Expand Down Expand Up @@ -1401,6 +1454,7 @@ void help_text()
printf(" with -s or --symlinks, or when specifying a\n");
printf(" particular directory more than once; refer to the\n");
printf(" fdupes documentation for additional information\n");
printf(" -e --relink reflink files to the first file in each set\n");
printf(" -D --deferconfirmation in interactive mode, defer byte-for-byte confirmation\n");
printf(" of duplicates until just before file deletion\n");
#ifndef NO_NCURSES
Expand Down Expand Up @@ -1503,6 +1557,9 @@ int main(int argc, char **argv) {
{ "log", 1, 0, 'l' },
{ "deferconfirmation", 0, 0, 'D' },
{ "cache", 0, 0, 'c' },
#ifdef FICLONE
{ "reflink", 0, 0, 'e' },
#endif
{ 0, 0, 0, 0 }
};
#define GETOPT getopt_long
Expand All @@ -1516,7 +1573,7 @@ int main(int argc, char **argv) {

oldargv = cloneargs(argc, argv);

while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:Dcx:"
while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:Dcx:" OPT_E
#ifdef HAVE_GETOPT_H
, long_options, NULL
#endif
Expand Down Expand Up @@ -1574,6 +1631,9 @@ int main(int argc, char **argv) {
case 'd':
SETFLAG(flags, F_DELETEFILES);
break;
case 'e':
SETFLAG(flags, F_REFLINK);
break;
case 'P':
SETFLAG(flags, F_PLAINPROMPT);
break;
Expand Down Expand Up @@ -1675,8 +1735,8 @@ int main(int argc, char **argv) {
exit(1);
}

if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
errormsg("options --summarize and --delete are not compatible\n");
if (ISFLAG(flags, F_SUMMARIZEMATCHES) && (ISFLAG(flags, F_DELETEFILES) || ISFLAG(flags, F_REFLINK))) {
errormsg("options --summarize is not compatible with --delete or --reflink\n");
exit(1);
}

Expand All @@ -1686,6 +1746,12 @@ int main(int argc, char **argv) {
exit(1);
}

if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_REFLINK))
{
errormsg("options --delete and --reflink are not compatible\n");
exit(1);
}

if (!ISFLAG(flags, F_DELETEFILES)) {
logfile = 0;
loginfo = 0;
Expand Down Expand Up @@ -1842,6 +1908,8 @@ int main(int argc, char **argv) {
ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
sort_pairs_by_filename, loginfo );
}
else if (ISFLAG(flags, F_REFLINK) && ISFLAG(flags, F_IMMEDIATE))
reflinkfile(*match, curfile);
else if (ISFLAG(flags, F_DEFERCONFIRMATION) || confirmmatch(file1, file2))
registerpair(match, curfile,
ordertype == ORDER_MTIME ? sort_pairs_by_mtime :
Expand Down Expand Up @@ -1918,6 +1986,12 @@ int main(int argc, char **argv) {
}
}

else if (ISFLAG(flags, F_REFLINK))
{
if (ISFLAG(flags, F_NOPROMPT) || ISFLAG(flags, F_IMMEDIATE))
reflinkfiles(files);
}

else

if (ISFLAG(flags, F_SUMMARIZEMATCHES))
Expand Down
1 change: 1 addition & 0 deletions flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#define F_PRUNECACHE 0x200000
#define F_READONLYCACHE 0x400000
#define F_VACUUMCACHE 0x800000
#define F_REFLINK 0x1000000

extern unsigned long flags;

Expand Down

0 comments on commit c28cf8b

Please sign in to comment.