Skip to content

Commit 2bdc589

Browse files
authored
fix(rosetta): properly escape C# comments as XML (#1184)
The C# compiler will completely drop comment blocks that aren't well-formed XML; the <img> tag we were outputting wasn't, and so namespace headings weren't being generated. At the same time, pay more attention to escaping of arbitrary HTML and the text inside attributes to minimize the chances of things going wrong in the future.
1 parent 9538c48 commit 2bdc589

File tree

7 files changed

+182
-24
lines changed

7 files changed

+182
-24
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
export interface Escaper {
2+
/**
3+
* Escape for use in XML/HTML text
4+
*/
5+
text(x: string | null): string;
6+
7+
/**
8+
* Escape for use in XML/HTML attributes
9+
*/
10+
attribute(x: string | null): string;
11+
12+
/**
13+
* Re-escape a string that has been escaped for text to be escaped for attributes
14+
*
15+
* Conceptually this unescapes text back to raw and re-escapes for attributes,
16+
* but for speed in practice we just do the additional escapes.
17+
*/
18+
text2attr(x: string | null): string;
19+
}
20+
21+
/**
22+
* Make a generic XML escaper
23+
*/
24+
export function makeXmlEscaper(): Escaper {
25+
const attr: Escapes = [...TEXT, ...ATTR_ADDL];
26+
27+
return {
28+
text: (x) => escapeText(TEXT, x),
29+
attribute: (x) => escapeText(attr, x),
30+
text2attr: (x) => escapeText(ATTR_ADDL, x)
31+
};
32+
}
33+
34+
/**
35+
* Make a Java specific escaper
36+
*
37+
* This one also escapes '@' because that triggers parsing of comment directives
38+
* in Java.
39+
*/
40+
export function makeJavaEscaper(): Escaper {
41+
const javaText: Escapes = [...TEXT, [new RegExp('@', 'g'), '&#64;']];
42+
const javaAttr: Escapes = [...javaText, ...ATTR_ADDL];
43+
44+
return {
45+
text: (x) => escapeText(javaText, x),
46+
attribute: (x) => escapeText(javaAttr, x),
47+
text2attr: (x) => escapeText(ATTR_ADDL, x)
48+
};
49+
}
50+
51+
type Escapes = Array<[RegExp, string]>;
52+
53+
const TEXT: Escapes = [
54+
[new RegExp('&', 'g'), '&amp;'],
55+
[new RegExp('<', 'g'), '&lt;'],
56+
[new RegExp('>', 'g'), '&gt;'],
57+
];
58+
59+
// Additional escapes (in addition to the text escapes) which need to be escaped inside attributes.
60+
const ATTR_ADDL: Escapes = [
61+
[new RegExp('"', 'g'), '&quot;'],
62+
[new RegExp("'", 'g'), '&apos;'],
63+
];
64+
65+
function escapeText(set: Escapes, what: string | null): string {
66+
if (!what) { return ''; }
67+
68+
for (const [re, repl] of set) {
69+
what = what.replace(re, repl);
70+
}
71+
72+
return what;
73+
}

packages/jsii-rosetta/lib/markdown/javadoc-renderer.ts

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import * as cm from 'commonmark';
22
import { RendererContext } from './markdown';
33
import { MarkdownRenderer, collapsePara, para, stripTrailingWhitespace, stripPara } from './markdown-renderer';
4+
import { makeJavaEscaper } from './escapes';
5+
6+
const ESCAPE = makeJavaEscaper();
47

58
/* eslint-disable @typescript-eslint/camelcase */
69

@@ -16,7 +19,7 @@ export class JavaDocRenderer extends MarkdownRenderer {
1619
}
1720

1821
public code(node: cm.Node, _context: RendererContext) {
19-
return `<code>${escapeCharacters(node.literal)}</code>`;
22+
return `<code>${ESCAPE.text(node.literal)}</code>`;
2023
}
2124

2225
/**
@@ -30,15 +33,15 @@ export class JavaDocRenderer extends MarkdownRenderer {
3033
*/
3134
/* eslint-disable-next-line @typescript-eslint/camelcase */
3235
public code_block(node: cm.Node, _context: RendererContext) {
33-
return para(`<blockquote><pre>\n${escapeCharacters(node.literal)}</pre></blockquote>`);
36+
return para(`<blockquote><pre>\n${ESCAPE.text(node.literal)}</pre></blockquote>`);
3437
}
3538

3639
public text(node: cm.Node, _context: RendererContext) {
37-
return escapeCharacters(node.literal) || '';
40+
return ESCAPE.text(node.literal) ?? '';
3841
}
3942

4043
public link(node: cm.Node, context: RendererContext) {
41-
return `<a href="${node.destination || ''}">${context.content()}</a>`;
44+
return `<a href="${ESCAPE.attribute(node.destination) ?? ''}">${context.content()}</a>`;
4245
}
4346

4447
public document(_node: cm.Node, context: RendererContext) {
@@ -60,7 +63,7 @@ export class JavaDocRenderer extends MarkdownRenderer {
6063
}
6164

6265
public image(node: cm.Node, context: RendererContext) {
63-
return `<img alt="${context.content()}" src="${node.destination || ''}">`;
66+
return `<img alt="${ESCAPE.text2attr(context.content())}" src="${ESCAPE.attribute(node.destination) ?? ''}">`;
6467
}
6568

6669
public emph(_node: cm.Node, context: RendererContext) {
@@ -77,13 +80,6 @@ export class JavaDocRenderer extends MarkdownRenderer {
7780
}
7881
}
7982

80-
/**
81-
* Escape the characters that need escaping in JavaDoc HTML
82-
*/
83-
function escapeCharacters(x: string | null): string {
84-
return x ? x.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/@/g, '&#64;') : '';
85-
}
86-
8783
function collapseParaJava(x: string) {
8884
return collapsePara(x, '\n<p>\n');
8985
}

packages/jsii-rosetta/lib/markdown/xml-comment-renderer.ts

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
import * as cm from 'commonmark';
22
import { prefixLines, RendererContext } from './markdown';
33
import { MarkdownRenderer, para, stripPara } from './markdown-renderer';
4+
import { makeXmlEscaper } from './escapes';
5+
6+
const ESCAPE = makeXmlEscaper();
7+
8+
// The types for 'xmldom' are not complete.
9+
/* eslint-disable-next-line @typescript-eslint/no-var-requires,@typescript-eslint/no-require-imports */
10+
const { DOMParser, XMLSerializer } = require('xmldom');
411

512
/* eslint-disable @typescript-eslint/camelcase */
613

@@ -16,19 +23,23 @@ export class CSharpXmlCommentRenderer extends MarkdownRenderer {
1623
}
1724

1825
public code(node: cm.Node, _context: RendererContext) {
19-
return `<c>${escapeCharacters(node.literal)}</c>`;
26+
return `<c>${ESCAPE.text(node.literal)}</c>`;
2027
}
2128

2229
public code_block(node: cm.Node, _context: RendererContext) {
2330
return para(`<code><![CDATA[\n${node.literal}]]></code>`);
2431
}
2532

2633
public text(node: cm.Node, _context: RendererContext) {
27-
return escapeCharacters(node.literal) || '';
34+
return ESCAPE.text(node.literal) ?? '';
2835
}
2936

3037
public link(node: cm.Node, context: RendererContext) {
31-
return `${context.content()} (${node.destination || ''})`;
38+
return `<a href="${ESCAPE.attribute(node.destination) ?? ''}">${context.content()}</a>`;
39+
}
40+
41+
public image(node: cm.Node, context: RendererContext) {
42+
return `<img alt="${ESCAPE.text2attr(context.content())}" src="${ESCAPE.attribute(node.destination) ?? ''}" />`;
3243
}
3344

3445
public emph(_node: cm.Node, context: RendererContext) {
@@ -53,14 +64,31 @@ export class CSharpXmlCommentRenderer extends MarkdownRenderer {
5364
return `<description>${stripPara(context.content())}</description>\n`;
5465
}
5566

56-
public image(node: cm.Node, context: RendererContext) {
57-
return `<img alt="${context.content()}" src="${node.destination || ''}">`;
67+
public thematic_break(_node: cm.Node, _context: RendererContext) {
68+
return para('<hr />');
5869
}
59-
}
6070

61-
/**
62-
* Escape the characters that need escaping in XML
63-
*/
64-
function escapeCharacters(x: string | null): string {
65-
return x ? x.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;') : '';
66-
}
71+
/**
72+
* HTML needs to be converted to XML
73+
*
74+
* If we don't do this, the parser will reject the whole XML block once it seens an unclosed
75+
* <img> tag.
76+
*/
77+
public html_inline(node: cm.Node, _context: RendererContext) {
78+
const html = node.literal ?? '';
79+
const doc = new DOMParser().parseFromString(html, 'text/html');
80+
return new XMLSerializer().serializeToString(doc);
81+
}
82+
83+
/**
84+
* HTML needs to be converted to XML
85+
*
86+
* If we don't do this, the parser will reject the whole XML block once it seens an unclosed
87+
* <img> tag.
88+
*/
89+
public html_block(node: cm.Node, _context: RendererContext) {
90+
const html = node.literal ?? '';
91+
const doc = new DOMParser().parseFromString(html, 'text/html');
92+
return new XMLSerializer().serializeToString(doc);
93+
}
94+
}

packages/jsii-rosetta/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"commonmark": "^0.29.0",
3535
"fs-extra": "^8.1.0",
3636
"typescript": "~3.7.4",
37+
"xmldom": "^0.2.1",
3738
"yargs": "^15.1.0"
3839
},
3940
"jest": {

packages/jsii-rosetta/test/markdown/javadoc.test.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,19 @@ if (x &lt; 3) {
7676
`);
7777
});
7878

79+
test('quotes are escaped inside attributes', () => {
80+
expectOutput(`
81+
['tis but a "scratch"](http://bla.ck/"kni"gh&t)
82+
83+
![nay merely a "flesh wound" &cet](http://bla.ck/"kni"gh&t.jpg)
84+
`, `
85+
<a href="http://bla.ck/%22kni%22gh&amp;t">'tis but a "scratch"</a>
86+
<p>
87+
<img alt="nay merely a &quot;flesh wound&quot; &amp;cet" src="http://bla.ck/%22kni%22gh&amp;t.jpg">
88+
`);
89+
});
90+
91+
7992
function expectOutput(source: string, expected: string) {
8093
if (DEBUG) {
8194
// tslint:disable-next-line:no-console

packages/jsii-rosetta/test/markdown/xmldoccomments.test.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,43 @@ if (x < 3) {
4545
`);
4646
});
4747

48+
test('quotes are escaped inside attributes', () => {
49+
expectOutput(`
50+
['tis but a "scratch"](http://bla.ck/"kni"gh&t)
51+
52+
![nay merely a "flesh wound" &cet](http://bla.ck/"kni"gh&t.jpg)
53+
`, `
54+
<a href="http://bla.ck/%22kni%22gh&amp;t">'tis but a "scratch"</a>
55+
56+
<img alt="nay merely a &quot;flesh wound&quot; &amp;cet" src="http://bla.ck/%22kni%22gh&amp;t.jpg" />
57+
`);
58+
});
59+
60+
61+
test('convert header properly', () => {
62+
expectOutput(`
63+
<!--BEGIN STABILITY BANNER-->
64+
65+
---
66+
67+
![Stability: Stable](https://img.shields.io/badge/stability-Stable-success.svg?style=for-the-badge)
68+
69+
---
70+
<!--END STABILITY BANNER-->
71+
`, `
72+
<!--BEGIN STABILITY BANNER-->
73+
74+
<hr />
75+
76+
<img alt="Stability: Stable" src="https://img.shields.io/badge/stability-Stable-success.svg?style=for-the-badge" />
77+
78+
<hr />
79+
80+
<!--END STABILITY BANNER-->
81+
`);
82+
});
83+
84+
4885
function expectOutput(source: string, expected: string) {
4986
if (DEBUG) {
5087
// tslint:disable-next-line:no-console

yarn.lock

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,11 @@
13281328
dependencies:
13291329
"@types/node" "*"
13301330

1331+
"@types/xmldom@^0.1.29":
1332+
version "0.1.29"
1333+
resolved "https://registry.yarnpkg.com/@types/xmldom/-/xmldom-0.1.29.tgz#c4428b0ca86d3b881475726fd94980b38a27c381"
1334+
integrity sha1-xEKLDKhtO4gUdXJv2UmAs4onw4E=
1335+
13311336
"@types/yargs-parser@*":
13321337
version "13.1.0"
13331338
resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-13.1.0.tgz#c563aa192f39350a1d18da36c5a8da382bbd8228"
@@ -8543,6 +8548,11 @@ xmlbuilder@^13.0.2:
85438548
resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-13.0.2.tgz#02ae33614b6a047d1c32b5389c1fdacb2bce47a7"
85448549
integrity sha512-Eux0i2QdDYKbdbA6AM6xE4m6ZTZr4G4xF9kahI2ukSEMCzwce2eX9WlTI5J3s+NU7hpasFsr8hWIONae7LluAQ==
85458550

8551+
xmldom@^0.2.1:
8552+
version "0.2.1"
8553+
resolved "https://registry.yarnpkg.com/xmldom/-/xmldom-0.2.1.tgz#cac9465066f161e1c3302793ea4dbe59c518274f"
8554+
integrity sha512-kXXiYvmblIgEemGeB75y97FyaZavx6SQhGppLw5TKWAD2Wd0KAly0g23eVLh17YcpxZpnFym1Qk/eaRjy1APPg==
8555+
85468556
xtend@^4.0.0, xtend@~4.0.1:
85478557
version "4.0.2"
85488558
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"

0 commit comments

Comments
 (0)