Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

purge based on ffmpeg audio checksum via -a #177

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: C/C++ CI

on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: install dependencies
run: |
sudo apt-get update
sudo apt-get install -yq \
build-essential clang clang-tools autotools-dev autoconf libtool \
libavcodec-dev libavformat-dev libavfilter-dev libswscale-dev libavutil-dev
- name: configure with all
run: |
autoreconf --install
./configure --with-ncurses --with-sqlite --with-ffmpeg
make
- name: configure without ffmpeg
run: |
autoreconf --install
./configure --with-ncurses --with-sqlite --without-ffmpeg
make
7 changes: 7 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ fdupes_SOURCES = fdupes.c\
md5/md5.h
dist_man1_MANS = fdupes.1

if HAVE_FFMPEG
fdupes_SOURCES += ffmpeg.c
endif

if WITH_NCURSES
fdupes_SOURCES += filegroup.h\
fileaction.h\
Expand Down Expand Up @@ -64,5 +68,8 @@ endif

EXTRA_DIST = testdir CHANGES CONTRIBUTORS

fdupes_CFLAGS = $(FFMPEG_CFLAGS)
fdupes_LDADD = $(FFMPEG_LIBS)

dist-hook:
if [ -f $(top_srcdir)/INSTALL.enduser ]; then chmod u+w $(distdir)/INSTALL; \cp -f $(top_srcdir)/INSTALL.enduser $(distdir)/INSTALL; fi
5 changes: 5 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Introduction
FDUPES is a program for identifying duplicate files residing
within specified directories.

This fork adds audio duplicate handling via ffmpeg to determine a files audio
hash, disregarding metadata differences. Enabling this option (-a) will ONLY
example audio files for duplicate purging.


Usage
--------------------------------------------------------------------
Expand All @@ -17,6 +21,7 @@ Usage: fdupes [options] DIRECTORY...
-H --hardlinks normally, when two or more files point to the same
disk area they are treated as non-duplicates; this
option will change this behavior
-a --audio-only only work on audio files
-G --minsize=SIZE consider only files greater than or equal to SIZE bytes
-L --maxsize=SIZE consider only files less than or equal to SIZE bytes
-n --noempty exclude zero-length files from consideration
Expand Down
14 changes: 14 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
[AC_DEFINE([_XOPEN_SOURCE], [700], [enable certain X/Open and POSIX features])]
)

#
# FFMPEG for audio hash
#
AC_ARG_WITH([ffmpeg], AS_HELP_STRING([--without-ffmpeg], [Do not use ffmpeg for audio file indentification]))

AS_IF([test x"$with_ffmpeg" != x"no"],
[PKG_CHECK_MODULES([FFMPEG], [libavformat libavcodec libavutil], [], [AC_ERROR([ffmpeg libs not found])])]
[AC_DEFINE([HAVE_FFMPEG], [1], [Use ffmpeg for audio hash comparison])]
)
AC_SUBST(FFMPEG_CFLAGS)
AC_SUBST(FFMPEG_LIBS)
AM_CONDITIONAL([HAVE_FFMPEG], [test x"$with_ffmpeg" != x"no"])


#
# NCURSES library
#
Expand Down
110 changes: 99 additions & 11 deletions fdupes.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
#include "config.h"
#include <stdio.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <strings.h>
#include <sys/stat.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
Expand Down Expand Up @@ -320,6 +322,7 @@ int grokdir(char *dir, file_t **filelistp, struct stat *logfile_status)
newfile->crcpartial = NULL;
newfile->duplicates = NULL;
newfile->hasdupes = 0;
newfile->audioinfo = NULL;

newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);

Expand Down Expand Up @@ -396,6 +399,25 @@ int grokdir(char *dir, file_t **filelistp, struct stat *logfile_status)
free(newfile);
} else {
if (S_ISREG(linfo.st_mode) || (S_ISLNK(linfo.st_mode) && ISFLAG(flags, F_FOLLOWLINKS))) {
#ifdef HAVE_FFMPEG
if (ISFLAG(flags, F_AUDIOONLY))
{
char* err = NULL;
ffmpeg_t* audioinfo = ffmpeg_alloc();

int ret;
if ( (ret = ffmpeg_audioinfo(audioinfo, newfile->d_name, &err)) == FFMPEG_OK) {
newfile->audioinfo = audioinfo;
}
else {
free(err);
ffmpeg_free(audioinfo);
free(newfile->d_name);
free(newfile);
continue;
}
}
#endif
getfilestats(newfile, &info, &linfo);
*filelistp = newfile;
filecount++;
Expand Down Expand Up @@ -449,7 +471,7 @@ md5_byte_t *getcrcsignatureuntil(char *filename, off_t fsize, off_t max_read)

toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize;
if (fread(chunk, toread, 1, file) != 1) {
errormsg("error reading from file %s\n", filename);
errormsg("error reading from file %s - (toread=%d, chunks=%d, file size=%d) %s\n", filename, toread, CHUNK_SIZE, fsize, strerror(errno));
fclose(file);
return NULL;
}
Expand Down Expand Up @@ -675,6 +697,25 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
return NULL;
}

if (ISFLAG(flags, F_AUDIOONLY))
{
assert(file->audioinfo && checktree->file->audioinfo);
{
if (file->audioinfo->hash < checktree->file->audioinfo->hash) {
cmpresult = -1;
}
else {
if (file->audioinfo->hash > checktree->file->audioinfo->hash) {
cmpresult = 1;
}
else {
cmpresult = 0;
}
}
}
}
else
{
if (file->size < checktree->file->size)
cmpresult = -1;
else
Expand Down Expand Up @@ -752,6 +793,7 @@ file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature);
}
}
}

if (cmpresult < 0) {
if (checktree->left != NULL) {
Expand Down Expand Up @@ -823,13 +865,17 @@ void printmatches(file_t *files)
(files->size != 1) ? "s " : " ");
if (ISFLAG(flags, F_SHOWTIME))
printf("%s ", fmttime(files->mtime));
if (ISFLAG(flags, F_AUDIOONLY))
printf("%s %s %s %s ", files->audioinfo->meta.artist, files->audioinfo->meta.album, files->audioinfo->meta.title, files->audioinfo->meta.genre);
if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
}
tmpfile = files->duplicates;
while (tmpfile != NULL) {
if (ISFLAG(flags, F_SHOWTIME))
printf("%s ", fmttime(tmpfile->mtime));
if (ISFLAG(flags, F_AUDIOONLY))
printf("%s %s %s %s ", tmpfile->audioinfo->meta.artist, tmpfile->audioinfo->meta.album, tmpfile->audioinfo->meta.title, tmpfile->audioinfo->meta.genre);
if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
tmpfile = tmpfile->duplicates;
Expand Down Expand Up @@ -972,10 +1018,14 @@ void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)

if (prompt)
{
printf("[%d] ", counter);
if (ISFLAG(flags, F_SHOWTIME))
printf("[%d] [%s] %s\n", counter, fmttime(files->mtime), files->d_name);
else
printf("[%d] %s\n", counter, files->d_name);
printf("[%s] ", fmttime(files->mtime));

if (ISFLAG(flags, F_AUDIOONLY)) {
printf("['%s' '%s' '%s' '%s'] ", files->audioinfo->meta.artist, files->audioinfo->meta.album, files->audioinfo->meta.title, files->audioinfo->meta.genre);
}
printf("%s\n", files->d_name);
}

tmpfile = files->duplicates;
Expand All @@ -984,10 +1034,14 @@ void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)
dupelist[++counter] = tmpfile;
if (prompt)
{
if (ISFLAG(flags, F_SHOWTIME))
printf("[%d] [%s] %s\n", counter, fmttime(tmpfile->mtime), tmpfile->d_name);
else
printf("[%d] %s\n", counter, tmpfile->d_name);
printf("[%d] ", counter);
if (ISFLAG(flags, F_SHOWTIME))
printf("[%s] ", fmttime(tmpfile->mtime));

if (ISFLAG(flags, F_AUDIOONLY)) {
printf("['%s' '%s' '%s' '%s'] ", tmpfile->audioinfo->meta.artist, tmpfile->audioinfo->meta.album, tmpfile->audioinfo->meta.title, tmpfile->audioinfo->meta.genre);
}
printf("%s\n", tmpfile->d_name);
}
tmpfile = tmpfile->duplicates;
}
Expand Down Expand Up @@ -1370,6 +1424,9 @@ void help_text()
printf(" -H --hardlinks normally, when two or more files point to the same\n");
printf(" disk area they are treated as non-duplicates; this\n");
printf(" option will change this behavior\n");
#ifdef HAVE_FFMPEG
printf(" -a --audio-only only work on audio files\n");
#endif
printf(" -G --minsize=SIZE consider only files greater than or equal to SIZE bytes\n");
printf(" -L --maxsize=SIZE consider only files less than or equal to SIZE bytes\n");
#ifndef NO_SQLITE
Expand Down Expand Up @@ -1452,7 +1509,8 @@ void close_db_on_exit()
}
#endif

int main(int argc, char **argv) {
int main(int argc, char **argv)
{
int x;
int opt;
FILE *file1;
Expand All @@ -1476,6 +1534,9 @@ int main(int argc, char **argv) {
#ifdef HAVE_GETOPT_H
static struct option long_options[] =
{
#ifdef HAVE_FFMPEG
{ "audio-only", 0, 0, 'a' },
#endif
{ "omitfirst", 0, 0, 'f' },
{ "recurse", 0, 0, 'r' },
{ "recurse:", 0, 0, 'R' },
Expand Down Expand Up @@ -1516,12 +1577,17 @@ int main(int argc, char **argv) {

oldargv = cloneargs(argc, argv);

while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:Dcx:"
while ((opt = GETOPT(argc, argv, "afrRq1StsHG:L:nAdPvhNImpo:il:Dcx:"
#ifdef HAVE_GETOPT_H
, long_options, NULL
#endif
)) != EOF) {
switch (opt) {
#ifdef HAVE_FFMPEG
case 'a':
SETFLAG(flags, F_AUDIOONLY);
break;
#endif
case 'f':
SETFLAG(flags, F_OMITFIRST);
break;
Expand Down Expand Up @@ -1645,6 +1711,25 @@ int main(int argc, char **argv) {
exit(1);
}

x = optind;
bool badargs = false;
while (x < argc) {
// validate every path before lenghty compute only to find typos
if (access(argv[x], F_OK) != 0) {
printf("invalid input: %s - %s\n", argv[x], strerror(errno));
badargs = true;
}
++x;
}
if (badargs) {
return -1;
}

if (ISFLAG(flags, F_AUDIOONLY)) {
UNSETFLAG(flags, F_IMMEDIATE);
printf("audio-only mode disables immediate delete\n");
}

#ifdef NO_SQLITE
if (
ISFLAG(flags, F_CACHESIGNATURES) ||
Expand Down Expand Up @@ -1842,7 +1927,7 @@ int main(int argc, char **argv) {
ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
sort_pairs_by_filename, loginfo );
}
else if (ISFLAG(flags, F_DEFERCONFIRMATION) || confirmmatch(file1, file2))
else if (ISFLAG(flags, F_DEFERCONFIRMATION) || (ISFLAG(flags, F_AUDIOONLY) && strcmp(curfile->audioinfo->audiohash, (*match)->audioinfo->audiohash) == 0) || (!ISFLAG(flags, F_AUDIOONLY) && confirmmatch(file1, file2)) )
registerpair(match, curfile,
ordertype == ORDER_MTIME ? sort_pairs_by_mtime :
ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
Expand Down Expand Up @@ -1932,6 +2017,9 @@ int main(int argc, char **argv) {
free(files->d_name);
free(files->crcsignature);
free(files->crcpartial);
#ifdef HAVE_FFMPEG
ffmpeg_free(files->audioinfo);
#endif
free(files);
files = curfile;
}
Expand Down
4 changes: 3 additions & 1 deletion fdupes.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@
#include "config.h"
#include <sys/stat.h>
#include "md5/md5.h"
#include "ffmpeg.h"

typedef struct _file {
char *d_name;
off_t size;
md5_byte_t *crcpartial;
md5_byte_t *crcsignature;
ffmpeg_t* audioinfo;
dev_t device;
ino_t inode;
time_t mtime;
Expand All @@ -42,4 +44,4 @@ typedef struct _file {
struct _file *next;
} file_t;

#endif
#endif
Loading