Skip to content

Commit

Permalink
Merge pull request #21 from DrKain/issue-4
Browse files Browse the repository at this point in the history
1.5.0
  • Loading branch information
DrKain authored Aug 27, 2022
2 parents 589525a + c41cd3a commit 212fdf8
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 34 deletions.
2 changes: 1 addition & 1 deletion lib/help.d.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export declare const help_text = "\nUsage: subclean [options]\nExample: subclean subtitle.srt -w\nBulk: subclean --sweep \"path/to/media\"\n\nOptions:\n -i, --input The file you want to clean\n -o, --output Where to write the cleaned file (defaults to input)\n -w, --overwrite Overwrite the output file if it already exists \n -c, --clean Delete the input file before writing the output \n -v, --version Display current version\n -n, --no-check Don't check for a new package version\n -s, --silent Silent mode. Nothing logged to console\n --update Download the latest filters from GitHub\n This will not update subclean, only the filters!\n\n --sweep Bulk subtitle cleaning. Searches for subtitles\n in multiple directories (and sub-directories)\n This will enable --overwrite!\n\n v- You likely do not need to use these -v\n\n --depth How many sub-directories to look when sweep cleaning\n --debug Display extra debugging information\n --help Show the text you're reading now\n --ne No Empty (nodes). Deletes empty nodes after cleaning.\n --testing Testing mode. Will not modify files.\n --uf Use Filter: internal or appdata\n";
export declare const help_text = "\nUsage: subclean [options]\nExample: subclean subtitle.srt -w\nBulk: subclean --sweep \"path/to/media\"\n\nOptions:\n -i, --input The file you want to clean\n -o, --output Where to write the cleaned file (defaults to input)\n -w, --overwrite Overwrite the output file if it already exists \n -c, --clean Delete the input file before writing the output \n -v, --version Display current version\n -n, --no-check Don't check for a new package version\n -s, --silent Silent mode. Nothing logged to console\n --update Download the latest filters from GitHub\n This will not update subclean, only the filters!\n\n --sweep Bulk subtitle cleaning. Searches for subtitles\n in multiple directories (and sub-directories)\n This will enable --overwrite!\n\n v- You likely do not need to use these -v\n\n --nochains Attempt to match and remove chained nodes.\n --depth How many sub-directories to look when sweep cleaning\n --debug Display extra debugging information\n --help Show the text you're reading now\n --ne No Empty (nodes). Deletes empty nodes after cleaning.\n --testing Testing mode. Will not modify files.\n --uf Use Filter: internal or appdata\n";
2 changes: 1 addition & 1 deletion lib/help.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 47 additions & 13 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ var SubClean = /** @class */ (function () {
sweep: argv.sweep || '',
depth: (_a = argv.depth) !== null && _a !== void 0 ? _a : 10,
ne: argv['ne'] || false,
nochains: argv.nochains || false,
testing: argv.testing || false,
uf: argv.uf || 'default'
};
Expand Down Expand Up @@ -366,25 +367,58 @@ var SubClean = /** @class */ (function () {
_this.blacklist.forEach(function (mark) {
var regex = null;
_this.actions_count++;
if (mark.startsWith('/') && mark.endsWith('/')) {
// remove first and last characters
regex = new RegExp(mark.substring(1, mark.length - 1), 'i');
if (regex.exec(node.data.text)) {
var text = node.data.text;
/**
* Clean chained nodes based on current match
* https://github.com/DrKain/subclean/pull/20
*/
var handle_chain = function () {
var removed = [];
var range = index + 1 + "-" + (index + 1);
if (index > 0 && item.nochains) {
var prev = nodes_1[index - 1];
if (text.includes(prev.data.text)) {
for (var i = index - 1; i > 0; i--) {
var check = nodes_1[i].data.text;
if (check.length === 0)
continue; // Ignore empty string nodes
if (!text.includes(check))
break; // Chain stopped
hits_1++;
removed.push(nodes_1[i].data.text);
range = index + 1 - removed.length + "-" + (index + 1);
nodes_1[i].data.text = '';
}
}
}
return { nodes: removed, range: range };
};
// Clean the current node
var clean = function () {
if (_this.args.debug)
_this.log('[Line] ' + text);
// Requires --nochains param
var chain = handle_chain();
if (chain.nodes.length > 1) {
_this.log("[Match] Chain found at " + chain.range + " (" + mark + ")");
hits_1 += chain.nodes.length;
}
else {
_this.log("[Match] Advertising found in node " + (index + 1) + " (" + mark + ")");
if (_this.args.debug)
_this.log('[Line] ' + node.data.text);
hits_1++;
node.data.text = '';
}
};
if (mark.startsWith('/') && mark.endsWith('/')) {
// remove first and last characters
regex = new RegExp(mark.substring(1, mark.length - 1), 'i');
if (regex.exec(text))
clean();
}
else {
if (node.data.text.toLowerCase().includes(mark)) {
_this.log("[Match] Advertising found in node " + (index + 1) + " (" + mark + ")");
if (_this.args.debug)
_this.log('[Line] ' + node.data.text);
hits_1++;
node.data.text = '';
}
// Plain text matches
if (node.data.text.toLowerCase().includes(mark))
clean();
}
});
});
Expand Down
9 changes: 9 additions & 0 deletions lib/interface.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export interface IArguments {
* Do not log anything to the console
*/
silent: boolean;
/**
* Attempt to remove chained ads
*/
nochains: boolean;
/**
* Expects directory. This will clean multiple files across multiple directories and subdirectories.
* Use the depth parameter to limit how many directories deep subclean will look.
Expand Down Expand Up @@ -70,3 +74,8 @@ export interface IArguments {
*/
uf: 'default' | 'appdata' | 'internal';
}
export interface INode {
data: {
text: string;
};
}
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"name": "subclean",
"version": "1.4.1",
"version": "1.5.0",
"description": "A CLI package to clean subtitle files of advertising",
"main": "lib/index.js",
"scripts": {
"ver": "nvm use 14.18.0",
"test": "ts-node src/index.ts subtitle.srt --ne -w --uf=internal",
"test": "ts-node src/index.ts subtitle.srt --ne -w --uf=internal --nochains",
"build": "tsc && pkg . && npm run tidy",
"tidy": "ts-node postbuild.ts"
},
Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ If using Bazarr, please see the [wiki page](https://github.com/DrKain/subclean/w

```
Usage: subclean [options]
Single: subclean subtitle.srt -w
Example: subclean subtitle.srt -w
Bulk: subclean --sweep "path/to/media"
Options:
Expand All @@ -46,6 +46,7 @@ Options:
v- You likely do not need to use these -v
--nochains Attempt to match and remove chained nodes. Experimental.
--depth How many sub-directories to look when sweep cleaning
--debug Display extra debugging information
--help Show the text you're reading now
Expand Down
1 change: 1 addition & 0 deletions src/help.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Options:
v- You likely do not need to use these -v
--nochains Attempt to match and remove chained nodes.
--depth How many sub-directories to look when sweep cleaning
--debug Display extra debugging information
--help Show the text you're reading now
Expand Down
71 changes: 55 additions & 16 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#! /usr/bin/env node
import { statSync, existsSync, readdirSync, readFileSync, writeFileSync, mkdirSync, unlinkSync } from 'fs';
import { dirname, join, resolve, extname, basename } from 'path';
import { parseSync, stringifySync, Format } from 'subtitle';
import { parseSync, stringifySync, Format, NodeList } from 'subtitle';
import { IArguments, INode } from './interface';
import { help_text } from './help';
import { get } from 'https';
import { IArguments } from './interface';

const argv = require('minimist')(process.argv.slice(2));
const updateNotifier = require('update-notifier');
Expand Down Expand Up @@ -41,6 +41,7 @@ class SubClean {
depth: argv.depth ?? 10,
ne: argv['ne'] || false,

nochains: argv.nochains || false,
testing: argv.testing || false,
uf: argv.uf || 'default'
} as IArguments;
Expand Down Expand Up @@ -302,34 +303,72 @@ class SubClean {
// Remove all cases of \r (parser can not handle these)
fileData = fileData.replace(/\r/g, ' ');

const nodes = parseSync(fileData);
const nodes: INode[] = parseSync(fileData) as INode[];
let hits = 0;

// For debugging
this.nodes_count += nodes.length;

// Remove ads
nodes.forEach((node: any, index) => {
nodes.forEach((node: INode, index: number) => {
this.blacklist.forEach((mark: any) => {
let regex = null;
this.actions_count++;
const text = node.data.text;

/**
* Clean chained nodes based on current match
* https://github.com/DrKain/subclean/pull/20
*/
const handle_chain = (): { nodes: string[]; range: string } => {
const removed = [];
let range = `${index + 1}-${index + 1}`;

if (index > 0 && item.nochains) {
const prev = nodes[index - 1];

if (text.includes(prev.data.text)) {
for (let i = index - 1; i > 0; i--) {
const check = nodes[i].data.text;
if (check.length === 0) continue; // Ignore empty string nodes
if (!text.includes(check)) break; // Chain stopped

hits++;
removed.push(nodes[i].data.text);

range = `${index + 1 - removed.length}-${index + 1}`;
nodes[i].data.text = '';
}
}
}

if (mark.startsWith('/') && mark.endsWith('/')) {
// remove first and last characters
regex = new RegExp(mark.substring(1, mark.length - 1), 'i');
if (regex.exec(node.data.text)) {
return { nodes: removed, range };
};

// Clean the current node
const clean = () => {
if (this.args.debug) this.log('[Line] ' + text);

// Requires --nochains param
const chain = handle_chain();

if (chain.nodes.length > 1) {
this.log(`[Match] Chain found at ${chain.range} (${mark})`);
hits += chain.nodes.length;
} else {
this.log(`[Match] Advertising found in node ${index + 1} (${mark})`);
if (this.args.debug) this.log('[Line] ' + node.data.text);
hits++;
node.data.text = '';
}
};

if (mark.startsWith('/') && mark.endsWith('/')) {
// remove first and last characters
regex = new RegExp(mark.substring(1, mark.length - 1), 'i');
if (regex.exec(text)) clean();
} else {
if (node.data.text.toLowerCase().includes(mark)) {
this.log(`[Match] Advertising found in node ${index + 1} (${mark})`);
if (this.args.debug) this.log('[Line] ' + node.data.text);
hits++;
node.data.text = '';
}
// Plain text matches
if (node.data.text.toLowerCase().includes(mark)) clean();
}
});
});
Expand All @@ -355,7 +394,7 @@ class SubClean {
}

// Stringify cleaned subtitles
const cleaned = stringifySync(nodes, { format: item.ext as Format });
const cleaned = stringifySync(nodes as NodeList, { format: item.ext as Format });

// Write cleaned file
if (this.args.testing === false) {
Expand Down
10 changes: 10 additions & 0 deletions src/interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export interface IArguments {
* Do not log anything to the console
*/
silent: boolean;
/**
* Attempt to remove chained ads
*/
nochains: boolean;
/**
* Expects directory. This will clean multiple files across multiple directories and subdirectories.
* Use the depth parameter to limit how many directories deep subclean will look.
Expand Down Expand Up @@ -76,3 +80,9 @@ export interface IArguments {
*/
uf: 'default' | 'appdata' | 'internal';
}

export interface INode {
data: {
text: string;
};
}

0 comments on commit 212fdf8

Please sign in to comment.