diff --git a/.gitignore b/.gitignore index 5cc4810..8cb5a5b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ scrypt-asm.js *.msi *.exe /libscrypt/crypto_scrypt-nosse-nommap.c +publicsuffixlist/public_suffix_list.dat diff --git a/ext/webextension/package-lock.json b/ext/webextension/package-lock.json index 93877a9..363325e 100644 --- a/ext/webextension/package-lock.json +++ b/ext/webextension/package-lock.json @@ -12,6 +12,7 @@ "jest": "^27.3.1", "jest-puppeteer": "^6.0.0", "jest-webextension-mock": "^3.7.19", + "pngjs3": "^6.0.1", "puppeteer": "^11.0.0" } }, @@ -1515,6 +1516,15 @@ "integrity": "sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==", "dev": true }, + "node_modules/browserify-zlib": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/browserify-zlib/-/browserify-zlib-0.2.0.tgz", + "integrity": "sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==", + "dev": true, + "dependencies": { + "pako": "~1.0.5" + } + }, "node_modules/browserslist": { "version": "4.18.1", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.18.1.tgz", @@ -2848,6 +2858,16 @@ "node": ">= 4" } }, + "node_modules/immer": { + "version": "9.0.7", + "resolved": "https://registry.npmjs.org/immer/-/immer-9.0.7.tgz", + "integrity": "sha512-KGllzpbamZDvOIxnmJ0jI840g7Oikx58lBPWV0hUh7dtAyZpFqqrBZdKka5GlTwMTZ1Tjc/bKKW4VSFAt6BqMA==", + "dev": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/immer" + } + }, "node_modules/import-fresh": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", @@ -4311,6 +4331,12 @@ "node": ">=6" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "dev": true + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -4419,6 +4445,19 @@ "node": ">=8" } }, + "node_modules/pngjs3": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/pngjs3/-/pngjs3-6.0.1.tgz", + "integrity": "sha512-3dqkpjsmDW+CHujoP2RWLT7z7ZkAYfU5w8B7QjfW/sAXmNBsEeA5AY5z+O/ceUOIIfH6FpccyS7r3y+PXFo8GQ==", + "dev": true, + "dependencies": { + "browserify-zlib": "^0.2.0", + "immer": "^9.0.6" + }, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", @@ -6661,6 +6700,15 @@ "integrity": "sha512-9o5UecI3GhkpM6DrXr69PblIuWxPKk9Y0jHBRhdocZ2y7YECBFCsHm79Pr3OyR2AvjhDkabFJaDJMYRazHgsow==", "dev": true }, + "browserify-zlib": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/browserify-zlib/-/browserify-zlib-0.2.0.tgz", + "integrity": "sha512-Z942RysHXmJrhqk88FmKBVq/v5tqmSkDz7p54G/MGyjMnCFFnC79XWNbg+Vta8W6Wb2qtSZTSxIGkJrRpCFEiA==", + "dev": true, + "requires": { + "pako": "~1.0.5" + } + }, "browserslist": { "version": "4.18.1", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.18.1.tgz", @@ -7648,6 +7696,12 @@ "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==", "dev": true }, + "immer": { + "version": "9.0.7", + "resolved": "https://registry.npmjs.org/immer/-/immer-9.0.7.tgz", + "integrity": "sha512-KGllzpbamZDvOIxnmJ0jI840g7Oikx58lBPWV0hUh7dtAyZpFqqrBZdKka5GlTwMTZ1Tjc/bKKW4VSFAt6BqMA==", + "dev": true + }, "import-fresh": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", @@ -8797,6 +8851,12 @@ "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", "dev": true }, + "pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "dev": true + }, "parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -8878,6 +8938,16 @@ "find-up": "^4.0.0" } }, + "pngjs3": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/pngjs3/-/pngjs3-6.0.1.tgz", + "integrity": "sha512-3dqkpjsmDW+CHujoP2RWLT7z7ZkAYfU5w8B7QjfW/sAXmNBsEeA5AY5z+O/ceUOIIfH6FpccyS7r3y+PXFo8GQ==", + "dev": true, + "requires": { + "browserify-zlib": "^0.2.0", + "immer": "^9.0.6" + } + }, "prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", diff --git a/ext/webextension/package.json b/ext/webextension/package.json index 31a7d08..c0baa6c 100644 --- a/ext/webextension/package.json +++ b/ext/webextension/package.json @@ -7,12 +7,15 @@ "unittest": "node --experimental-vm-modules node_modules/jest/bin/jest.js --silent=false src/*.test.js" }, "type": "module", - "jest": { "transform": {} }, + "jest": { + "transform": {} + }, "devDependencies": { + "eslint": "^8.2.0", "jest": "^27.3.1", "jest-puppeteer": "^6.0.0", "jest-webextension-mock": "^3.7.19", - "puppeteer": "^11.0.0", - "eslint": "^8.2.0" + "pngjs3": "^6.0.1", + "puppeteer": "^11.0.0" } } diff --git a/ext/webextension/src/lib/psllookup.js b/ext/webextension/src/lib/psllookup.js new file mode 100644 index 0000000..5156d09 --- /dev/null +++ b/ext/webextension/src/lib/psllookup.js @@ -0,0 +1,67 @@ +function loadImage(url) { + let img = new Image(); + return new Promise(res=>{ + img.onload = ()=>{ + res(img); + } + img.src = url; + }); +} + +async function getPixels(url) { + let img = await loadImage(url); + let canvas = document.createElement('canvas'); + canvas.height = img.height; + canvas.width = img.width; + let context = canvas.getContext('2d'); + context.drawImage(img, 0, 0); + return context.getImageData(0, 0, img.width, img.height).data; +} + + +function pixeldata_to_json(pixeldata) { + pixeldata = pixeldata.filter((_,i)=> i%4 ==0); + const blob = new Blob([pixeldata], {type: 'text/plain; charset=utf-8'}); + return blob.text(); +} + +export class PslLookup { + constructor(args) { + args = args || {}; + args = Object.assign({tableLoader: getPixels, tableurl: "./psllookup.json.png"}, args); + this.psltable = args.tableLoader(args.tableurl) + .then(pixeldata_to_json) + .then(JSON.parse) + .catch(e=>{console.log("something is failing",e)}); + } + + async waitTableReady() { + let lut = await this.psltable; + this.psltable = lut; + } + + getPublicDomain(url) { + let lut = this.psltable; + const parts = url.split('.').reverse(); + let res = []; + let v; + + for (v=0; v < parts.length; v++) { + const part = parts[v]; + if (!lut) break; + if (part in lut) { + res.push(part); + lut = lut[part] + } + else if ('*' in lut) { + res.push(part); + lut = null; + } else + break; + } + if (v < parts.length) + res.push(parts[v]); + + return res.reverse().join('.'); + } +} diff --git a/ext/webextension/src/lib/psllookup.json.png b/ext/webextension/src/lib/psllookup.json.png new file mode 100644 index 0000000..467c0f8 Binary files /dev/null and b/ext/webextension/src/lib/psllookup.json.png differ diff --git a/ext/webextension/src/lib/psllookup.test.js b/ext/webextension/src/lib/psllookup.test.js new file mode 100644 index 0000000..ba65833 --- /dev/null +++ b/ext/webextension/src/lib/psllookup.test.js @@ -0,0 +1,55 @@ +/* globals global */ +"use strict"; +import {it, expect} from '@jest/globals' +import {PslLookup} from './psllookup.js' +import fs from 'fs'; +import {PNG} from 'pngjs3' +// import { sync as PNGSync } from 'pngjs3'; +import { URL } from 'url'; + +function pngPixels(url) { + const url_abspath = new URL(url, import.meta.url).pathname; + const data = fs.readFileSync(url_abspath); + + return new Promise(resolve=>{ + new PNG().parse(data, function (error, data) { + resolve(data.data); + }); + }); +} + +class MockBlob { + constructor(data/*, params*/) { + let txt = data.toString("utf8"); + this.text = ()=>{return Promise.resolve(txt)}; + } +} +global.Blob = MockBlob; + +it('gets the correct domain from url', async () => { + + + const psl = new PslLookup({tableLoader: pngPixels}); + await psl.waitTableReady() + const getDomain = psl.getPublicDomain.bind(psl); + + expect(getDomain('example.com')).toBe('example.com'); + expect(getDomain('amazon.com')).toBe('amazon.com'); + expect(getDomain('show.amazon.com')).toBe('amazon.com'); + expect(getDomain('amazon.co.uk')).toBe('amazon.co.uk'); + expect(getDomain('shop.amazon.co.uk')).toBe('amazon.co.uk'); + expect(getDomain('tyridal.no')).toBe('tyridal.no'); + expect(getDomain('digi.gitapp.si')).toBe('digi.gitapp.si'); + expect(getDomain('www.tyridal.no')).toBe('tyridal.no'); + expect(getDomain('torbjorn.tyridal.no')).toBe('tyridal.no'); + expect(getDomain('wilson.no.eu.org')).toBe('wilson.no.eu.org'); + expect(getDomain('xxx.wilson.no.eu.org')).toBe('wilson.no.eu.org'); + expect(getDomain('weare.org.om')).toBe('weare.org.om'); + expect(getDomain('rave.weare.org.om')).toBe('weare.org.om'); + expect(getDomain('rave.blogspot.co.nz')).toBe('rave.blogspot.co.nz'); + expect(getDomain('rave.blogspot.com')).toBe('rave.blogspot.com'); + expect(getDomain('xx.rave.blogspot.co.nz')).toBe('rave.blogspot.co.nz'); + expect(getDomain('xx.rave.blogspot.com')).toBe('rave.blogspot.com'); + expect(getDomain('blogspot.com')).toBe('blogspot.com'); + +}); diff --git a/publicsuffixlist/Makefile b/publicsuffixlist/Makefile new file mode 100644 index 0000000..c93a4cb --- /dev/null +++ b/publicsuffixlist/Makefile @@ -0,0 +1,7 @@ +../ext/webextension/src/lib/psllookup.json.png: public_suffix_list.dat + python psl_to_pgm.py public_suffix_list.dat | convert pgm:- -strip -define png:compression-filter=1 ../ext/webextension/src/lib/psllookup.json.png + +public_suffix_list.dat: + wget https://publicsuffix.org/list/public_suffix_list.dat + + diff --git a/publicsuffixlist/psl_to_pgm.py b/publicsuffixlist/psl_to_pgm.py new file mode 100644 index 0000000..4b07db0 --- /dev/null +++ b/publicsuffixlist/psl_to_pgm.py @@ -0,0 +1,103 @@ +from __future__ import print_function +from collections import * +import sys +import json + +tree = lambda: defaultdict(tree) + +def is_ascii(s): return all(ord(c) < 128 for c in s) + +def build_tree_from_psl(pslfilename): + domain_tree = tree() + + for l in open(pslfilename): + l = l.strip() + if not l or l[0] == '/' or '.' not in l: continue + x = l.split('.') + + if l[0]=='!': continue ## deal with those later + + x = x[::-1] + d = domain_tree + for q in x: + if not is_ascii(q): + q = "xn-"+q.encode('punycode').decode('ascii') + d = d[q] + + return domain_tree + + +# convert defaultdict to dict and replace empty dicts (leafs) +# with single 0 value +def walk(d, dst): + for k,v in d.items(): + if v: + dst[k] = dict() + walk(v, dst[k]) + else: + dst[k] = 0 + + +## convert bytearray s to P5 PGM image +def pgmdump(s): + rows = int(len(s) / 4096) + 1 + cols = int(len(s)/rows) + 1 + padding = rows*cols - len(s) + + print("P5") + print(cols) + print(rows) + print(255) + print(s, end='') + print(" "*padding) + + +table=dict() +walk(build_tree_from_psl(sys.argv[1]), table) +pgmdump(json.dumps(table).replace(' ','')) + +if len(sys.argv) < 3 or sys.argv[2] != "test": + sys.exit(0) + +def lookup(url, d): + urlparts = url.split('.')[::-1] + + lut = table + res = [] + + it = iter(urlparts) + + for part in it: + res.append(part) + if not lut: + break + elif part in lut: + lut = lut[part] + elif '*' in lut: + lut = 0 + else: + break + + return ".".join(res[::-1]) + +for test in [ + 'example.com', + 'amazon.com', + 'show.amazon.com', + 'amazon.co.uk', + 'shop.amazon.co.uk', + 'tyridal.no', + 'digi.gitapp.si', + 'www.tyridal.no', + 'torbjorn.tyridal.no', + 'wilson.no.eu.org', + 'xxx.wilson.no.eu.org', + 'weare.org.om', + 'rave.weare.org.om', + 'rave.blogspot.co.nz', + 'rave.blogspot.com', + 'xx.rave.blogspot.co.nz', + 'xx.rave.blogspot.com', + 'blogspot.com', + ]: + print(test, "->", lookup(test, table))