All files / src html-tree-validation.js

99.06% Statements 213/215
83.63% Branches 46/55
100% Functions 3/3
99.06% Lines 213/215

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 2162x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 1430x 1430x 113x 113x 113x 113x 4x 4x 113x 1426x 1426x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 2x 218x 217x 217x 217x 218x 45x 218x 5x 5x 5x 3x 3x 5x 2x 2x 5x 1x 41x 218x 218x 2x 2x 2x 2x 2x 2x 2x 2x 2x 10100x 10083x 10089x 9938x 9938x 9938x 6598x     6598x 4x 4x 6598x 416x 416x 6598x 9938x 9663x 9663x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 10100x 2x 2x 2x 2x 2x 10100x 9661x 9661x 9661x  
/**
 * Map of elements that have certain elements that are not allowed inside them, in the sense that they will auto-close the parent/ancestor element.
 * Theoretically one could take advantage of it but most of the time it will just result in confusing behavior and break when SSR'd.
 * There are more elements that are invalid inside other elements, but they're not auto-closed and so don't break SSR and are therefore not listed here.
 * @type {Record<string, { direct: string[]} | { descendant: string[]; reset_by?: string[] }>}
 */
const autoclosing_children = {
	// based on http://developers.whatwg.org/syntax.html#syntax-tag-omission
	li: { direct: ['li'] },
	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/dt#technical_summary
	dt: { descendant: ['dt', 'dd'], reset_by: ['dl'] },
	dd: { descendant: ['dt', 'dd'], reset_by: ['dl'] },
	p: {
		descendant: [
			'address',
			'article',
			'aside',
			'blockquote',
			'div',
			'dl',
			'fieldset',
			'footer',
			'form',
			'h1',
			'h2',
			'h3',
			'h4',
			'h5',
			'h6',
			'header',
			'hgroup',
			'hr',
			'main',
			'menu',
			'nav',
			'ol',
			'p',
			'pre',
			'section',
			'table',
			'ul'
		]
	},
	rt: { descendant: ['rt', 'rp'] },
	rp: { descendant: ['rt', 'rp'] },
	optgroup: { descendant: ['optgroup'] },
	option: { descendant: ['option', 'optgroup'] },
	thead: { direct: ['tbody', 'tfoot'] },
	tbody: { direct: ['tbody', 'tfoot'] },
	tfoot: { direct: ['tbody'] },
	tr: { direct: ['tr', 'tbody'] },
	td: { direct: ['td', 'th', 'tr'] },
	th: { direct: ['td', 'th', 'tr'] }
};
 
/**
 * Returns true if the tag is either the last in the list of siblings and will be autoclosed,
 * or not allowed inside the parent tag such that it will auto-close it. The latter results
 * in the browser repairing the HTML, which will likely result in an error during hydration.
 * @param {string} current
 * @param {string} [next]
 */
export function closing_tag_omitted(current, next) {
	const disallowed = autoclosing_children[current];
	if (disallowed) {
		if (
			!next ||
			('direct' in disallowed ? disallowed.direct : disallowed.descendant).includes(next)
		) {
			return true;
		}
	}
	return false;
}
 
/**
 * Map of elements that have certain elements that are not allowed inside them, in the sense that the browser will somehow repair the HTML.
 * There are more elements that are invalid inside other elements, but they're not repaired and so don't break SSR and are therefore not listed here.
 * @type {Record<string, { direct: string[]} | { descendant: string[]; reset_by?: string[]; only?: string[] } | { only: string[] }>}
 */
const disallowed_children = {
	...autoclosing_children,
	optgroup: { only: ['option', '#text'] },
	// Strictly speaking, seeing an <option> doesn't mean we're in a <select>, but we assume it here
	option: { only: ['#text'] },
	form: { descendant: ['form'] },
	a: { descendant: ['a'] },
	button: { descendant: ['button'] },
	h1: { descendant: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
	h2: { descendant: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
	h3: { descendant: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
	h4: { descendant: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
	h5: { descendant: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
	h6: { descendant: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] },
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inselect
	select: { only: ['option', 'optgroup', '#text', 'hr', 'script', 'template'] },
 
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-intd
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-incaption
	// No special behavior since these rules fall back to "in body" mode for
	// all except special table nodes which cause bad parsing behavior anyway.
 
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-intd
	tr: { only: ['th', 'td', 'style', 'script', 'template'] },
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-intbody
	tbody: { only: ['tr', 'style', 'script', 'template'] },
	thead: { only: ['tr', 'style', 'script', 'template'] },
	tfoot: { only: ['tr', 'style', 'script', 'template'] },
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-incolgroup
	colgroup: { only: ['col', 'template'] },
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-intable
	table: {
		only: ['caption', 'colgroup', 'tbody', 'thead', 'tfoot', 'style', 'script', 'template']
	},
	// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inhead
	head: {
		only: [
			'base',
			'basefont',
			'bgsound',
			'link',
			'meta',
			'title',
			'noscript',
			'noframes',
			'style',
			'script',
			'template'
		]
	},
	// https://html.spec.whatwg.org/multipage/semantics.html#the-html-element
	html: { only: ['head', 'body', 'frameset'] },
	frameset: { only: ['frame'] },
	'#document': { only: ['html'] }
};
 
/**
 * Returns false if the tag is not allowed inside the ancestor tag (which is grandparent and above) such that it will result
 * in the browser repairing the HTML, which will likely result in an error during hydration.
 * @param {string} tag
 * @param {string[]} ancestors All nodes starting with the parent, up until the ancestor, which means two entries minimum
 * @returns {boolean}
 */
export function is_tag_valid_with_ancestor(tag, ancestors) {
	if (tag.includes('-')) return true; // custom elements can be anything
 
	const target = ancestors[ancestors.length - 1];
	const disallowed = disallowed_children[target];
	if (!disallowed) return true;
 
	if ('reset_by' in disallowed && disallowed.reset_by) {
		for (let i = ancestors.length - 2; i >= 0; i--) {
			const ancestor = ancestors[i];
			if (ancestor.includes('-')) return true; // custom elements can be anything
 
			// A reset means that forbidden descendants are allowed again
			if (disallowed.reset_by.includes(ancestors[i])) {
				return true;
			}
		}
	}
 
	return 'descendant' in disallowed ? !disallowed.descendant.includes(tag) : true;
}
 
/**
 * Returns false if the tag is not allowed inside the parent tag such that it will result
 * in the browser repairing the HTML, which will likely result in an error during hydration.
 * @param {string} tag
 * @param {string | null} parent_tag
 * @returns {boolean}
 */
export function is_tag_valid_with_parent(tag, parent_tag) {
	if (tag.includes('-') || parent_tag?.includes('-')) return true; // custom elements can be anything
 
	if (parent_tag !== null) {
		const disallowed = disallowed_children[parent_tag];
 
		if (disallowed) {
			if ('direct' in disallowed && disallowed.direct.includes(tag)) {
				return false;
			}
			if ('descendant' in disallowed && disallowed.descendant.includes(tag)) {
				return false;
			}
			if ('only' in disallowed && disallowed.only) {
				return disallowed.only.includes(tag);
			}
		}
	}
 
	switch (tag) {
		case 'body':
		case 'caption':
		case 'col':
		case 'colgroup':
		case 'frameset':
		case 'frame':
		case 'head':
		case 'html':
		case 'tbody':
		case 'td':
		case 'tfoot':
		case 'th':
		case 'thead':
		case 'tr':
			// These tags are only valid with a few parents that have special child
			// parsing rules - if we're down here, then none of those matched and
			// so we allow it only if we don't know what the parent is, as all other
			// cases are invalid (and we only get into this function if we know the parent).
			return false;
	}
 
	return true;
}