Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to limit files considered based on min/max file size #33

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
7 changes: 7 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ who contributed the patch or idea appears first, followed by
those who've otherwise worked on that item. For a list of
contributors names and identifiers please see the CONTRIBUTORS file.

Changes from 1.51 to xxx
- Added -b (min) and -B (max) arguments to allow to filter files
based on the given values
- Added -e argument to allow for skipping the byte to byte verification
when the crc matches
- Added -x argument to not cross into another device from the given
arguments starting device

Changes from 1.50 to 1.51

Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ on their contributions. Names are listed in alphabetical order.
[LR] Lukas Ruf ([email protected])
[PB] Peter Bray (Sydney, Australia)
[SSD] Steven S. Dick ([email protected])

[JC] Jason Clara (Toronto, Canada)
2 changes: 1 addition & 1 deletion Makefile.inc/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# VERSION determines the program's version number.
#

VERSION = 1.51
VERSION = 1.52-dev
6 changes: 6 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ Usage: fdupes [options] DIRECTORY...
-f --omitfirst omit the first file in each set of matches
-1 --sameline list each set of matches on a single line
-S --size show size of duplicate files
-b --minfilesize Consider only files larger than N KB
-B --maxfilesize Consider only files smaller than N KB
-e --skipverify Skip final byte to byte verification after
checksum match
-x --xdevice Do not cross into another device from the given
arguments starting device
-q --quiet hide progress indicator
-d --delete prompt user for files to preserve and delete all
others; important: under particular circumstances,
Expand Down
12 changes: 12 additions & 0 deletions fdupes.1
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ list each set of matches on a single line
.B -S --size
show size of duplicate files
.TP
.B -b --minfilesize
Consider only files larger than N KB
.TP
.B -B --maxfilesize
Consider only files smaller than N KB
.TP
.B -e --skipverify
Skip final byte to byte verification after a checksum match
.TP
.B -x --xdevice
Do not cross into another device from the given arguments starting device
.TP
.B -m --summarize
summarize duplicate files information
.TP
Expand Down
160 changes: 138 additions & 22 deletions fdupes.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
#define F_SUMMARIZEMATCHES 0x0800
#define F_EXCLUDEHIDDEN 0x1000
#define F_PERMISSIONS 0x2000
#define F_MINFILESIZE 0x4000
#define F_MAXFILESIZE 0x8000
#define F_SKIPBYTEVERIFY 0x10000
#define F_XDEVICE 0x20000

typedef enum {
ORDER_TIME = 0,
Expand All @@ -64,6 +68,10 @@ char *program_name;

unsigned long flags = 0;

unsigned long long int min_file_size = 0;
unsigned long long int max_file_size = 0;
dev_t workingdevice;

#define CHUNK_SIZE 8192

#define INPUT_SIZE 256
Expand Down Expand Up @@ -237,6 +245,30 @@ int nonoptafter(char *option, int argc, char **oldargv,
return x;
}

int skipfile(file_t *file, struct stat info)
{
if(ISFLAG(flags, F_MINFILESIZE) && !S_ISDIR(info.st_mode) && file->size < min_file_size)
{
//printf("Small: %s %ld - %ld\n", file->d_name, min_file_size, filesize(file->d_name));
return 1;
}

if(ISFLAG(flags, F_MAXFILESIZE) && !S_ISDIR(info.st_mode) && file->size > max_file_size)
{
return 2;
}

return 0;
}

void getfilestats(file_t *file)
{
file->size = filesize(file->d_name);
file->inode = getinode(file->d_name);
file->device = getdevice(file->d_name);
file->mtime = getmtime(file->d_name);
}

int grokdir(char *dir, file_t **filelistp)
{
DIR *cd;
Expand All @@ -249,7 +281,11 @@ int grokdir(char *dir, file_t **filelistp)
static int progress = 0;
static char indicator[] = "-\\|/";
char *fullname, *name;


if (ISFLAG(flags, F_XDEVICE) && getdevice(dir) != workingdevice) {
printf("Skipping out of device item: %s\n", dir);
return 0;
}
cd = opendir(dir);

if (!cd) {
Expand Down Expand Up @@ -293,6 +329,8 @@ int grokdir(char *dir, file_t **filelistp)
if (lastchar >= 0 && dir[lastchar] != '/')
strcat(newfile->d_name, "/");
strcat(newfile->d_name, dirinfo->d_name);

getfilestats(newfile);

if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
fullname = strdup(newfile->d_name);
Expand All @@ -305,7 +343,7 @@ int grokdir(char *dir, file_t **filelistp)
free(fullname);
}

if (filesize(newfile->d_name) == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) {
if (newfile->size == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) {
free(newfile->d_name);
free(newfile);
continue;
Expand All @@ -322,6 +360,13 @@ int grokdir(char *dir, file_t **filelistp)
free(newfile);
continue;
}

if(skipfile(newfile, info))
{
free(newfile->d_name);
free(newfile);
continue;
}

if (S_ISDIR(info.st_mode)) {
if (ISFLAG(flags, F_RECURSE) && (ISFLAG(flags, F_FOLLOWLINKS) || !S_ISLNK(linfo.st_mode)))
Expand Down Expand Up @@ -462,17 +507,10 @@ void purgetree(filetree_t *checktree)
free(checktree);
}

void getfilestats(file_t *file)
{
file->size = filesize(file->d_name);
file->inode = getinode(file->d_name);
file->device = getdevice(file->d_name);
file->mtime = getmtime(file->d_name);
}

int registerfile(filetree_t **branch, file_t *file)
{
getfilestats(file);
//getfilestats(file);

*branch = (filetree_t*) malloc(sizeof(filetree_t));
if (*branch == NULL) {
Expand Down Expand Up @@ -512,11 +550,11 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
duplicates unless the user specifies otherwise.
*/

if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && (getinode(file->d_name) ==
checktree->file->inode) && (getdevice(file->d_name) ==
if (!ISFLAG(flags, F_CONSIDERHARDLINKS) && (file->inode ==
checktree->file->inode) && (file->device ==
checktree->file->device)) return NULL;

fsize = filesize(file->d_name);
fsize = file->size;

if (fsize < checktree->file->size)
cmpresult = -1;
Expand Down Expand Up @@ -610,7 +648,7 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
}
} else
{
getfilestats(file);
//getfilestats(file);
return &checktree->file;
}
}
Expand Down Expand Up @@ -995,6 +1033,10 @@ void help_text()
printf(" -f --omitfirst \tomit the first file in each set of matches\n");
printf(" -1 --sameline \tlist each set of matches on a single line\n");
printf(" -S --size \tshow size of duplicate files\n");
printf(" -b --minfilesize \tConsider only files larger than N KB\n");
printf(" -B --maxfilesize \tConsider only files smaller than N KB\n");
printf(" -e --skipverify \tSkip final byte to byte verification after checksum match\n");
printf(" -x --xdevice \tDo not cross into another device from the given arguments starting device\n");
printf(" -m --summarize \tsummarize dupe information\n");
printf(" -q --quiet \thide progress indicator\n");
printf(" -d --delete \tprompt user for files to preserve and delete all\n");
Expand All @@ -1018,6 +1060,39 @@ void help_text()
#endif
}

unsigned long long int parsesizeinput(char* input){
unsigned long long int inputsize;
char * endptr = NULL;
inputsize = strtoull(input, &endptr,10);
switch(*endptr){
case '\0':
inputsize = inputsize * 1024;
break;
case 'k':
case 'K':
inputsize = inputsize * 1024;
endptr++;
break;
case 'm':
case 'M':
inputsize = inputsize * 1024*1024;
endptr++;
break;
case 'g':
case 'G':
inputsize = inputsize *1024*1024*1024;
endptr++;
break;
default:
break;
}
if (*endptr != '\0'){
fprintf(stderr,"fdupes: provide numeric argument >0 for file size to consider\n");
exit(1);
}
return inputsize;
}

int main(int argc, char **argv) {
int x;
int opt;
Expand All @@ -1032,7 +1107,8 @@ int main(int argc, char **argv) {
char **oldargv;
int firstrecurse;
ordertype_t ordertype = ORDER_TIME;



#ifndef OMIT_GETOPT_LONG
static struct option long_options[] =
{
Expand All @@ -1057,6 +1133,10 @@ int main(int argc, char **argv) {
{ "summary", 0, 0, 'm' },
{ "permissions", 0, 0, 'p' },
{ "order", 1, 0, 'o' },
{ "minfilesize", 1, 0, 'b' },
{ "maxfilesize", 1, 0, 'B' },
{ "skipverify", 0, 0, 'e' },
{ "xdevice", 0, 0, 'x' },
{ 0, 0, 0, 0 }
};
#define GETOPT getopt_long
Expand All @@ -1068,7 +1148,7 @@ int main(int argc, char **argv) {

oldargv = cloneargs(argc, argv);

while ((opt = GETOPT(argc, argv, "frRq1SsHlndvhNmpo:"
while ((opt = GETOPT(argc, argv, "frRq1SsHlndvhNmpexo:b:B:"
#ifndef OMIT_GETOPT_LONG
, long_options, NULL
#endif
Expand Down Expand Up @@ -1118,6 +1198,34 @@ int main(int argc, char **argv) {
break;
case 'm':
SETFLAG(flags, F_SUMMARIZEMATCHES);
break;
case 'e':
SETFLAG(flags, F_SKIPBYTEVERIFY);
break;
case 'x':
SETFLAG(flags, F_XDEVICE);
break;
case 'b':
SETFLAG(flags, F_MINFILESIZE);
if (strlen(optarg) == 0) {
fprintf(stderr,"fdupes -b: provide numeric argument >0 for minimum file size to consider\n");
exit(1);
}
min_file_size = parsesizeinput(optarg);
break;
case 'B':
SETFLAG(flags, F_MAXFILESIZE);
if (strlen(optarg) == 0) {
fprintf(stderr,"fdupes -B: provide numeric argument >0 for maximum file size to consider\n");
exit(1);
}
max_file_size = parsesizeinput(optarg);

if (ISFLAG(flags, F_MAXFILESIZE) && ISFLAG(flags, F_MINFILESIZE) && min_file_size > max_file_size){
fprintf(stderr, "fdupes -B: min file size (-b) must be smaller then max file size(-B)\n");
exit(1);
}

break;
case 'p':
SETFLAG(flags, F_PERMISSIONS);
Expand Down Expand Up @@ -1167,18 +1275,26 @@ int main(int argc, char **argv) {

/* F_RECURSE is not set for directories before --recurse: */
for (x = optind; x < firstrecurse; x++)
filecount += grokdir(argv[x], &files);

{
workingdevice = getdevice(argv[x]);
filecount += grokdir(argv[x], &files);
}
/* Set F_RECURSE for directories after --recurse: */
SETFLAG(flags, F_RECURSE);

for (x = firstrecurse; x < argc; x++)
filecount += grokdir(argv[x], &files);
{
workingdevice = getdevice(argv[x]);
filecount += grokdir(argv[x], &files);
}
} else {
for (x = optind; x < argc; x++)
filecount += grokdir(argv[x], &files);
for (x = optind; x < argc; x++){
workingdevice = getdevice(argv[x]);
filecount += grokdir(argv[x], &files);
}
}


if (!files) {
if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
exit(0);
Expand Down Expand Up @@ -1206,7 +1322,7 @@ int main(int argc, char **argv) {
continue;
}

if (confirmmatch(file1, file2)) {
if (ISFLAG(flags, F_SKIPBYTEVERIFY) || confirmmatch(file1, file2)) {
registerpair(match, curfile,
(ordertype == ORDER_TIME) ? sort_pairs_by_mtime : sort_pairs_by_filename );

Expand Down