Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Optionally extract raw html instead of parse5 serialization #42

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/html/extractors/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ export interface IAttributeMapping {

export interface IHtmlExtractorOptions {
attributes?: IAttributeMapping;
rawHtml?: boolean;
}

export function validateOptions(options: IHtmlExtractorOptions): void {
Validate.optional.stringProperty(options, 'options.attributes.textPlural');
Validate.optional.stringProperty(options, 'options.attributes.context');
Validate.optional.stringProperty(options, 'options.attributes.comment');
Validate.optional.booleanProperty(options, 'options.rawHtml');
}

6 changes: 3 additions & 3 deletions src/html/extractors/factories/element.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ import { Element, Node } from '../../parser';
import { HtmlUtils } from '../../utils';
import { IHtmlExtractorOptions } from '../common';

export type ITextExtractor = (element: Element) => string | null;
export type ITextExtractor = (element: Element, source: string) => string | null;

export function elementExtractor(selector: string | IElementSelector[], textExtractor: ITextExtractor, options: IHtmlExtractorOptions = {}): IHtmlExtractorFunction {

let selectors = new ElementSelectorSet(selector);

return (node: Node, fileName: string, addMessage: IAddMessageCallback) => {
return (node: Node, source: string, fileName: string, addMessage: IAddMessageCallback) => {
if (typeof (<Element>node).tagName !== 'string') {
return;
}
Expand All @@ -38,7 +38,7 @@ export function elementExtractor(selector: string | IElementSelector[], textExtr
}
}

let text = textExtractor(element);
let text = textExtractor(element, source);

if (typeof text === 'string') {
addMessage({text, context, textPlural, comments});
Expand Down
4 changes: 3 additions & 1 deletion src/html/extractors/factories/elementAttribute.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ import { HtmlUtils } from '../../utils';
import { elementExtractor } from './element';
import { Validate } from '../../../utils/validate';
import { IHtmlExtractorOptions, validateOptions } from '../common';
import { Element } from '../../parser';


export function elementAttributeExtractor(selector: string, textAttribute: string, options: IHtmlExtractorOptions = {}): IHtmlExtractorFunction {
Validate.required.nonEmptyString({selector, textAttribute});
validateOptions(options);

return elementExtractor(selector, element => {
return elementExtractor(selector, (element: Element, source: string) => {
return HtmlUtils.getAttributeValue(element, textAttribute);
}, options);
}
11 changes: 8 additions & 3 deletions src/html/extractors/factories/elementContent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import { Validate } from '../../../utils/validate';
import { IContentOptions, IContentExtractorOptions, validateContentOptions } from '../../../utils/content';
import { IHtmlExtractorOptions, validateOptions } from '../common';
import { elementExtractor } from './element';
import { Element } from '../../parser';


export interface IElementContentExtractorOptions extends IHtmlExtractorOptions, IContentExtractorOptions {}

Expand All @@ -15,7 +17,7 @@ export function elementContentExtractor(selector: string, options: IElementConte
let contentOptions: IContentOptions = {
trimWhiteSpace: true,
preserveIndentation: false,
replaceNewLines: false
replaceNewLines: false,
};

if (options.content) {
Expand All @@ -30,7 +32,10 @@ export function elementContentExtractor(selector: string, options: IElementConte
}
}

return elementExtractor(selector, element => {
return HtmlUtils.getElementContent(element, contentOptions);
return elementExtractor(selector, (element: Element, source: string) => {
if (options.rawHtml)
return HtmlUtils.getElementContentSource(element, source, contentOptions);
else
return HtmlUtils.getElementContent(element, contentOptions);
}, options);
}
6 changes: 3 additions & 3 deletions src/html/extractors/factories/embeddedJs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,20 @@ export function embeddedJsExtractor(selector: string, jsParser: JsParser): IHtml

let selectors = new ElementSelectorSet(selector);

return (node: Node, fileName: string) => {
return (node: Node, source: string, fileName: string) => {
if (typeof (<Element>node).tagName !== 'string') {
return;
}

let element = <Element>node;

if (selectors.anyMatch(element)) {
let source = HtmlUtils.getElementContent(element, {
let content = HtmlUtils.getElementContent(element, {
trimWhiteSpace: false,
preserveIndentation: true,
replaceNewLines: false
});
jsParser.parseString(source, fileName, {
jsParser.parseString(content, fileName, {
lineNumberStart: element.sourceCodeLocation && element.sourceCodeLocation.startLine
});
}
Expand Down
10 changes: 5 additions & 5 deletions src/html/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ export type Node = parse5.DefaultTreeNode;
export type TextNode = parse5.DefaultTreeTextNode;
export type Element = parse5.DefaultTreeElement;

export type IHtmlExtractorFunction = (node: Node, fileName: string, addMessage: IAddMessageCallback) => void;
export type IHtmlExtractorFunction = (node: Node, source: string, fileName: string, addMessage: IAddMessageCallback) => void;

export class HtmlParser extends Parser<IHtmlExtractorFunction, IParseOptions> {

protected parse(source: string, fileName: string, options: IParseOptions = {}): IMessage[] {
let document = parse5.parse(source, {sourceCodeLocationInfo: true});
return this.parseNode(document, fileName, options.lineNumberStart || 1);
return this.parseNode(document, source, fileName, options.lineNumberStart || 1);
}

protected parseNode(node: any, fileName: string, lineNumberStart: number): IMessage[] {
protected parseNode(node: any, source: string, fileName: string, lineNumberStart: number): IMessage[] {
let messages: IMessage[] = [];
let addMessageCallback = Parser.createAddMessageCallback(messages, fileName, () => {
if (node.sourceCodeLocation && node.sourceCodeLocation.startLine) {
Expand All @@ -25,13 +25,13 @@ export class HtmlParser extends Parser<IHtmlExtractorFunction, IParseOptions> {
});

for (let extractor of this.extractors) {
extractor(node, fileName, addMessageCallback);
extractor(node, source, fileName, addMessageCallback);
}

let childNodes = node.content ? node.content.childNodes : node.childNodes;
if (childNodes) {
for (let n of childNodes) {
messages = messages.concat(this.parseNode(n, fileName, lineNumberStart));
messages = messages.concat(this.parseNode(n, source, fileName, lineNumberStart));
}
}

Expand Down
11 changes: 10 additions & 1 deletion src/html/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export abstract class HtmlUtils {

public static getElementContent(element: Element, options: IContentOptions): string {
let content = parse5.serialize(element, {});

// Un-escape characters that get escaped by parse5
content = content
.replace(/&amp;/g, '&')
Expand All @@ -25,4 +25,13 @@ export abstract class HtmlUtils {

return normalizeContent(content, options);
}

public static getElementContentSource(element: Element, source: string, options: IContentOptions): string {
const first = (element.childNodes[0] as Element).sourceCodeLocation
const last = (element.childNodes[element.childNodes.length - 1] as Element).sourceCodeLocation;
if (!(first && last))
throw new Error('source location info required');
const content = source.slice(first.startOffset, last.endOffset);
return normalizeContent(content, options);
}
}
2 changes: 1 addition & 1 deletion src/js/extractors/factories/callExpression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export function callExpressionExtractor(calleeName: string | string[], options:
}
}

return (node: ts.Node, sourceFile: ts.SourceFile, addMessage: IAddMessageCallback) => {
return (node: ts.Node, source: string, sourceFile: ts.SourceFile, addMessage: IAddMessageCallback) => {
if (node.kind === ts.SyntaxKind.CallExpression) {
let callExpression = <ts.CallExpression>node;

Expand Down
10 changes: 5 additions & 5 deletions src/js/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import * as ts from 'typescript';
import { Parser, IAddMessageCallback, IParseOptions } from '../parser';
import { IMessage } from '../builder';

export type IJsExtractorFunction = (node: ts.Node, sourceFile: ts.SourceFile, addMessage: IAddMessageCallback) => void;
export type IJsExtractorFunction = (node: ts.Node, source: string, sourceFile: ts.SourceFile, addMessage: IAddMessageCallback) => void;

export interface IJsParseOptions extends IParseOptions {
scriptKind?: ts.ScriptKind;
Expand All @@ -13,22 +13,22 @@ export class JsParser extends Parser<IJsExtractorFunction, IJsParseOptions> {

protected parse(source: string, fileName: string, options: IJsParseOptions = {}): IMessage[] {
let sourceFile = ts.createSourceFile(fileName, source, ts.ScriptTarget.Latest, true, options.scriptKind);
return this.parseNode(sourceFile, sourceFile, options.lineNumberStart || 1);
return this.parseNode(sourceFile, source, sourceFile, options.lineNumberStart || 1);
}

protected parseNode(node: ts.Node, sourceFile: ts.SourceFile, lineNumberStart: number): IMessage[] {
protected parseNode(node: ts.Node, source: string, sourceFile: ts.SourceFile, lineNumberStart: number): IMessage[] {
let messages: IMessage[] = [];
let addMessageCallback = Parser.createAddMessageCallback(messages, sourceFile.fileName, () => {
let location = sourceFile.getLineAndCharacterOfPosition(node.getStart());
return lineNumberStart + location.line;
});

for (let extractor of this.extractors) {
extractor(node, sourceFile, addMessageCallback);
extractor(node, source, sourceFile, addMessageCallback);
}

ts.forEachChild(node, n => {
messages = messages.concat(this.parseNode(n, sourceFile, lineNumberStart));
messages = messages.concat(this.parseNode(n, source, sourceFile, lineNumberStart));
});

return messages;
Expand Down
4 changes: 2 additions & 2 deletions tests/html/parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ describe('HtmlParser', () => {
builderMock = <any>{
addMessage: jest.fn()
};
parser = new HtmlParser(builderMock, [(node: Node, fileName: string, addMessage) => {
parser = new HtmlParser(builderMock, [(node: Node, source: string, fileName: string, addMessage) => {
if (node.nodeName === '#text') {
addMessage({
text: (node as TextNode).value
Expand Down Expand Up @@ -149,7 +149,7 @@ describe('HtmlParser', () => {
builderMock = <any>{
addMessage: jest.fn()
};
parser = new HtmlParser(builderMock, [(node: Node, fileName: string, addMessage) => {
parser = new HtmlParser(builderMock, [(node: Node, source: string, fileName: string, addMessage) => {
if (node.nodeName === '#text') {
addMessage({
text: (node as TextNode).value
Expand Down
31 changes: 29 additions & 2 deletions tests/html/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import { Element } from '../../src/html/parser';
describe('HTML: Utils', () => {

function createElement(source: string): Element {
return <Element>(<any>parse5.parse(source)).childNodes[0].childNodes[1].childNodes[0];
const parsed = parse5.parse(source, { sourceCodeLocationInfo: true });
return <Element>(<any>parsed).childNodes[0].childNodes[1].childNodes[0];
}

describe('getAttributeValue', () => {
Expand Down Expand Up @@ -45,6 +46,14 @@ describe('HTML: Utils', () => {
});
}

function getContentSource(source: string): string {
return HtmlUtils.getElementContentSource(createElement(source), source, {
preserveIndentation: true,
trimWhiteSpace: true,
replaceNewLines: false
});
}

test('single line', () => {
expect(getContent('<div>Foo Bar</div>')).toBe('Foo Bar');
});
Expand All @@ -65,11 +74,21 @@ describe('HTML: Utils', () => {
);
});

describe('un-escaping', () => {
describe('un-escaping ampersand', () => {

test('&', () => {
expect(getContent('<div>Foo & Bar</div>')).toBe('Foo & Bar');
expect(getContentSource('<div>Foo & Bar</div>')).toBe('Foo & Bar');
});

test('&amp;', () => {
// might want to change this https://github.com/lukasgeiter/gettext-extractor/issues/36
expect(getContent('<div>Foo &amp; Bar</div>')).toBe('Foo & Bar');
expect(getContentSource('<div>Foo &amp; Bar</div>')).toBe('Foo &amp; Bar');
});
});

describe('un-escaping less than / greater than', () => {

test('<', () => {
expect(getContent('<div>Foo < Bar</div>')).toBe('Foo < Bar');
Expand All @@ -79,5 +98,13 @@ describe('HTML: Utils', () => {
expect(getContent('<div>Foo > Bar</div>')).toBe('Foo > Bar');
});
});

describe('un-escaping other html entities', () => {

test('&hellip;', () => {
expect(getContent('<div>Foo &hellip; Bar</div>')).toBe('Foo … Bar');
expect(getContentSource('<div>Foo &hellip; Bar</div>')).toBe('Foo &hellip; Bar');
});
});
});
});
2 changes: 1 addition & 1 deletion tests/js/parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ describe('JsParser', () => {
builderMock = <any>{
addMessage: jest.fn()
};
parser = new JsParser(builderMock, [(node: ts.Node, sourceFile: ts.SourceFile, addMessage) => {
parser = new JsParser(builderMock, [(node: ts.Node, source: string, sourceFile: ts.SourceFile, addMessage) => {
if (node.kind === ts.SyntaxKind.StringLiteral) {
addMessage({
text: (<ts.StringLiteral>node).text
Expand Down
6 changes: 3 additions & 3 deletions tests/parser.common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export function registerCommonParserTests(parserClass: any): void {
});

test('addMessage call', () => {
let extractor = jest.fn().mockImplementationOnce((node: any, file: any, addMessage: IAddMessageCallback) => {
let extractor = jest.fn().mockImplementationOnce((node: any, source: string, file: any, addMessage: IAddMessageCallback) => {
addMessage({
text: 'Foo'
});
Expand Down Expand Up @@ -117,7 +117,7 @@ export function registerCommonParserTests(parserClass: any): void {
});

test('some files with messages', () => {
let extractor = jest.fn().mockImplementationOnce((node: any, file: any, addMessage: IAddMessageCallback) => {
let extractor = jest.fn().mockImplementationOnce((node: any, source: string, file: any, addMessage: IAddMessageCallback) => {
addMessage({
text: 'Foo'
});
Expand All @@ -134,7 +134,7 @@ export function registerCommonParserTests(parserClass: any): void {
});

test('all files with messages', () => {
let extractor = jest.fn().mockImplementation((node: any, file: any, addMessage: IAddMessageCallback) => {
let extractor = jest.fn().mockImplementation((node: any, source: string, file: any, addMessage: IAddMessageCallback) => {
addMessage({
text: 'Foo'
});
Expand Down