From c28cf8b122073746102c2df92df6a82d52795069 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 28 Dec 2024 19:07:10 +0100 Subject: [PATCH] add reflink support Add option --reflink to deduplicate identical files via filesystem reflinks. Reflink is done via the FICLONE ioctl, and allows to share the file content across multiple inodes. This is similar to hardlink, with two substantial differences: - each copy has its own metadata (permissions, timestamps, attributes, etc.) - on write, the content is copied (CoW) so the changes are not reflected to all other copies Currently implemented only on Linux, it works with BtrFS and XFS (and ZFS to some extents) --- fdupes.1 | 5 ++++ fdupes.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- flags.h | 1 + 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/fdupes.1 b/fdupes.1 index 2cd9903..425a05b 100644 --- a/fdupes.1 +++ b/fdupes.1 @@ -95,6 +95,11 @@ Prompt user for files to preserve, deleting all others (see .B CAVEATS below). .TP +.B -e --reflink +Linux only, attempt to reflink files to preserve space. +Files must be on the same filesystem and the filesystem must support reflinking. +Currently only BtrFS and XFS (and sometimes ZFS) support this feature. +.TP .B -D --deferconfirmation In interactive mode, defer byte-for-byte confirmation of duplicates until just before file deletion. diff --git a/fdupes.c b/fdupes.c index 76a7136..e0054f7 100644 --- a/fdupes.c +++ b/fdupes.c @@ -29,6 +29,16 @@ #include #include #include +#ifdef __linux__ +# include +# include +# ifdef FICLONE +# define OPT_E "e" +# include +# else +# define OPT_E +# endif +#endif #ifdef HAVE_GETOPT_H #include #endif @@ -1191,6 +1201,49 @@ void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile) free(preservestr); } +void reflinkfile(file_t *existing, file_t *duplicate) +{ +#ifdef FICLONE + int fd1, fd2, ret; + + if (existing->device != duplicate->device) + { + fprintf(stderr, "error: %s and %s are not on the same device\n", existing->d_name, duplicate->d_name); + return; + } + + fd1 = open(existing->d_name, O_RDONLY); + if (fd1 == -1) + { + printf("error opening %s\n", existing->d_name); + return; + } + + fd2 = open(duplicate->d_name, O_WRONLY); + if (fd2 == -1) + { + printf("error opening %s\n", duplicate->d_name); + close(fd1); + return; + } + + ret = ioctl(fd2, FICLONE, fd1); + if (ret == -1) + fprintf(stderr, "error reflinking %s to %s: %s\n", existing->d_name, duplicate->d_name, strerror(errno)); + + close(fd1); + close(fd2); +#endif +} + +void reflinkfiles(file_t *files) +{ + file_t *first = files; + + for (files = first; files->next; files = files->next) + reflinkfile(first, files->next); +} + int sort_pairs_by_arrival(file_t *f1, file_t *f2) { if (f2->duplicates != 0) @@ -1401,6 +1454,7 @@ void help_text() printf(" with -s or --symlinks, or when specifying a\n"); printf(" particular directory more than once; refer to the\n"); printf(" fdupes documentation for additional information\n"); + printf(" -e --relink reflink files to the first file in each set\n"); printf(" -D --deferconfirmation in interactive mode, defer byte-for-byte confirmation\n"); printf(" of duplicates until just before file deletion\n"); #ifndef NO_NCURSES @@ -1503,6 +1557,9 @@ int main(int argc, char **argv) { { "log", 1, 0, 'l' }, { "deferconfirmation", 0, 0, 'D' }, { "cache", 0, 0, 'c' }, +#ifdef FICLONE + { "reflink", 0, 0, 'e' }, +#endif { 0, 0, 0, 0 } }; #define GETOPT getopt_long @@ -1516,7 +1573,7 @@ int main(int argc, char **argv) { oldargv = cloneargs(argc, argv); - while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:Dcx:" + while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:Dcx:" OPT_E #ifdef HAVE_GETOPT_H , long_options, NULL #endif @@ -1574,6 +1631,9 @@ int main(int argc, char **argv) { case 'd': SETFLAG(flags, F_DELETEFILES); break; + case 'e': + SETFLAG(flags, F_REFLINK); + break; case 'P': SETFLAG(flags, F_PLAINPROMPT); break; @@ -1675,8 +1735,8 @@ int main(int argc, char **argv) { exit(1); } - if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) { - errormsg("options --summarize and --delete are not compatible\n"); + if (ISFLAG(flags, F_SUMMARIZEMATCHES) && (ISFLAG(flags, F_DELETEFILES) || ISFLAG(flags, F_REFLINK))) { + errormsg("options --summarize is not compatible with --delete or --reflink\n"); exit(1); } @@ -1686,6 +1746,12 @@ int main(int argc, char **argv) { exit(1); } + if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_REFLINK)) + { + errormsg("options --delete and --reflink are not compatible\n"); + exit(1); + } + if (!ISFLAG(flags, F_DELETEFILES)) { logfile = 0; loginfo = 0; @@ -1842,6 +1908,8 @@ int main(int argc, char **argv) { ordertype == ORDER_CTIME ? sort_pairs_by_ctime : sort_pairs_by_filename, loginfo ); } + else if (ISFLAG(flags, F_REFLINK) && ISFLAG(flags, F_IMMEDIATE)) + reflinkfile(*match, curfile); else if (ISFLAG(flags, F_DEFERCONFIRMATION) || confirmmatch(file1, file2)) registerpair(match, curfile, ordertype == ORDER_MTIME ? sort_pairs_by_mtime : @@ -1918,6 +1986,12 @@ int main(int argc, char **argv) { } } + else if (ISFLAG(flags, F_REFLINK)) + { + if (ISFLAG(flags, F_NOPROMPT) || ISFLAG(flags, F_IMMEDIATE)) + reflinkfiles(files); + } + else if (ISFLAG(flags, F_SUMMARIZEMATCHES)) diff --git a/flags.h b/flags.h index 0033f8e..1d30c1f 100644 --- a/flags.h +++ b/flags.h @@ -28,6 +28,7 @@ #define F_PRUNECACHE 0x200000 #define F_READONLYCACHE 0x400000 #define F_VACUUMCACHE 0x800000 +#define F_REFLINK 0x1000000 extern unsigned long flags;