Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve resource dialect conversion accuracy #25

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 37 additions & 24 deletions scripts/update.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,39 +33,52 @@ async function downloadResource(resourceName) {
function extractRedirects(data) {
console.log('Extracting resources...');

const resources = JSON.parse(data);
const mappings = resources.redirects.map((redirect) => [redirect.name, redirect.aliases ?? []]);
/**
* @type {Array<{ dialects: { adg?: string; ubo: string; }; hints: string[]; }>}
*/
const mappings = [];

// Integrate adguard mappings
for (const dialect of adguardDialects) {
// Skip adguard exclusives
if (dialect.aliases === undefined) {
continue;
/**
* @type {{ redirects: Array<{ name: string; aliases: string[]; body: string; contentType: string; }> }}
*/
const { redirects } = JSON.parse(data);
for (const redirect of redirects) {
/**
* @type {Set<string>}
*/
const hints = new Set();
hints.add(redirect.name);
for (const alias of redirect.aliases) {
hints.add(alias);
}

// Find an entry with adguard dialect
const entry = mappings.find(([, aliases]) => {
if (aliases.includes(dialect.title)) {
return true;
}

for (const alias of dialect.aliases) {
if (aliases.includes(alias)) {
return true;
// Register AdGuard dialects
/**
* @type {{ title: string; aliases: string[]; isBlocking: boolean; contentType: string; content: string; }}
*/
const adguardDialect = adguardDialects.find((dialect) =>
[dialect.title, ...(dialect.aliases ?? [])].includes(redirect.name),
);
if (adguardDialect !== undefined) {
hints.add(adguardDialect.title);
if (adguardDialect.aliases !== undefined) {
for (const alias of adguardDialect.aliases) {
hints.add(alias);
}
}
}

return false;
});
if (entry === undefined) {
if (hints.size === 1) {
continue;
}

for (const alias of [dialect.title, ...dialect.aliases]) {
if (entry[1].includes(alias) === false) {
entry[1].push(alias);
}
}
mappings.push({
dialects: {
adg: adguardDialect?.title,
ubo: redirect.name,
},
hints: Array.from(hints),
});
}

return JSON.stringify(mappings, null, 2);
Expand Down
4 changes: 2 additions & 2 deletions src/converters/adguard.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ const createFilter = (rules, filterId = 0) => {

export default async function convert(rules, { resourcesPath } = {}) {
const filter = createFilter(rules.map(normalizeFilter));
const conversionResult = await converter.convertStaticRuleSet(filter, { resourcesPath });
const conversionResult = await converter.convertStaticRuleSet(filter, { resourcesPath: '/a' });
Copy link
Member

@philipp-classen philipp-classen Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the use of "/a" intended here, or should it be using the "resourcesPath" from the arguments?

Update: I assume, it is intended. Perhaps we can give some hint where it originates from (some Adguard convention?). Though I'm not sure where a comment could fit best, since "/a" is also used at other places. I wonder if extracting a constant for it could help?

const declarativeRules = await conversionResult.ruleSet.getDeclarativeRules();

return {
rules: declarativeRules.map((rule) => normalizeRule(rule)),
rules: declarativeRules.map((rule) => normalizeRule(rule, { resourcesPath })),
errors: conversionResult.errors,
limitations: conversionResult.limitations,
};
Expand Down
97 changes: 49 additions & 48 deletions src/converters/helpers.js
Original file line number Diff line number Diff line change
@@ -1,34 +1,52 @@
import mappings from '../mappings.json';

function getPathBasename(path) {
const lastIndex = path.lastIndexOf('/');
if (lastIndex === -1) {
return path;
}
return path.slice(lastIndex + 1);
}

export function generateResourcesMapping() {
const resourcesMapping = new Map();
for (const [name, aliases] of mappings) {
for (const alias of aliases) {
resourcesMapping.set(alias, name);
}
}
return resourcesMapping;
}

export const DEFAULT_PARAM_MAPPING = {
'3p': 'third-party',
xhr: 'xmlhttprequest',
frame: 'subdocument',
};
export const DEFAULT_RESOURCES_MAPPING = generateResourcesMapping();

export function normalizeFilter(
filter,
{ mapping = DEFAULT_PARAM_MAPPING, resourcesMapping = DEFAULT_RESOURCES_MAPPING } = {},
) {
/**
* Normalizes redirect resource name into preferred format (if not found, will use 'ubo')
* @param {string} name
* @param {'ubo' | 'adg'} dialect
*/
function normalizeRedirect(name, dialect) {
if (dialect !== 'ubo' && dialect !== 'adg') {
throw new Error(`The redirect resource dialect of "${dialect}" is not supported!`);
}

/**
* @type {string[]}
*/
const candidates = [name];

if (name.indexOf('.') !== -1) {
candidates.push(name.slice(0, name.lastIndexOf('.')));
}

const mapping = mappings.find((mapping) => {
for (const candidate of candidates) {
const found =
mapping.hints.includes(candidate) ||
mapping.hints.find((hint) => hint.includes(candidate)) !== undefined;

if (found) {
return true;
}
}

return false;
});

if (mapping === undefined) {
return name;
}

return mapping.dialects[dialect] ?? mapping.dialects.ubo;
}

export function normalizeFilter(filter, { mapping = DEFAULT_PARAM_MAPPING } = {}) {
let [front, ...back] = filter.split('$');
let params = back.join(',').split(',');

Expand All @@ -49,24 +67,16 @@ export function normalizeFilter(
front = front.toLowerCase();
}

// adguard converter doesn't work with $redirect with slash value
// replace possible $redirect params including a slash
const indexOfRedirect = params.findIndex((p) => p.startsWith('redirect=') && p.includes('/'));
const indexOfRedirect = params.findIndex((p) => p.startsWith('redirect='));
if (indexOfRedirect !== -1) {
const name = resourcesMapping.get(params[indexOfRedirect].slice(9));
if (name !== undefined) {
params[indexOfRedirect] = 'redirect=' + name;
}
params[indexOfRedirect] =
'redirect=' + normalizeRedirect(params[indexOfRedirect].slice(9), 'adg');
}

const indexOfRedirectRule = params.findIndex(
(p) => p.startsWith('redirect-rule=') && p.includes('/'),
);
const indexOfRedirectRule = params.findIndex((p) => p.startsWith('redirect-rule='));
if (indexOfRedirectRule !== -1) {
const name = resourcesMapping.get(params[indexOfRedirectRule].slice(14));
if (name !== undefined) {
params[indexOfRedirectRule] = 'redirect-rule=' + name;
}
params[indexOfRedirect] =
'redirect-rule=' + normalizeRedirect(params[indexOfRedirect].slice(14), 'adg');
}

if (back.length === 0) {
Expand All @@ -76,7 +86,7 @@ export function normalizeFilter(
return `${front}$${params.join(',')}`;
}

export function normalizeRule(rule, { resourcesMapping = DEFAULT_RESOURCES_MAPPING } = {}) {
export function normalizeRule(rule, { resourcesPath } = {}) {
if (!rule) {
return;
}
Expand Down Expand Up @@ -111,16 +121,7 @@ export function normalizeRule(rule, { resourcesMapping = DEFAULT_RESOURCES_MAPPI
}

if (newRule.action && newRule.action.type === 'redirect') {
const filename = getPathBasename(newRule.action.redirect.extensionPath);
const preferredFilename =
resourcesMapping.get(filename) ??
// try searching without an extension
// adguard converter attaches an file extension at the end
resourcesMapping.get(filename.slice(0, filename.lastIndexOf('.')));
if (preferredFilename !== undefined) {
newRule.action.redirect.extensionPath =
newRule.action.redirect.extensionPath.slice(0, -filename.length) + preferredFilename;
}
newRule.action.redirect.extensionPath = `${resourcesPath}/${normalizeRedirect(newRule.action.redirect.extensionPath.slice(3 /* '/a/'.length */), 'ubo')}`;
}

return newRule;
Expand Down
Loading