Home / File/ parseHTML.ts — astro Source File

parseHTML.ts — astro Source File

Architecture documentation for parseHTML.ts, a typescript file in the astro codebase. 5 imports, 0 dependents.

File typescript CoreAstro CoreMiddleware 5 imports 5 functions

Entity Profile

Dependency Diagram

graph LR
  b9e6df3b_0db3_5da0_196d_ad954be5a3bd["parseHTML.ts"]
  839dada7_8bd4_2ee4_9498_f38d21d2f7e9["utils.ts"]
  b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 839dada7_8bd4_2ee4_9498_f38d21d2f7e9
  5ecba48f_d9f9_6a3a_2e04_bda4d9c652a4["isInsideExpression"]
  b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 5ecba48f_d9f9_6a3a_2e04_bda4d9c652a4
  040ca79b_dadf_4383_efd2_c0b13744e9f1["language-core"]
  b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 040ca79b_dadf_4383_efd2_c0b13744e9f1
  41525615_7e06_b0e8_f601_674c57b118ee["typescript"]
  b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 41525615_7e06_b0e8_f601_674c57b118ee
  4e2ee814_ff7b_a348_0e3a_6e6d7b34afb6["vscode-html-languageservice"]
  b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 4e2ee814_ff7b_a348_0e3a_6e6d7b34afb6
  style b9e6df3b_0db3_5da0_196d_ad954be5a3bd fill:#6366f1,stroke:#818cf8,color:#fff

Relationship Graph

Source Code

import type { VirtualCode } from '@volar/language-core';
import type ts from 'typescript';
import * as html from 'vscode-html-languageservice';
import { isInsideExpression } from '../plugins/utils';

const htmlLs = html.getLanguageService();

export function parseHTML(
	snapshot: ts.IScriptSnapshot,
	frontmatterEnd: number,
): { virtualCode: VirtualCode; htmlDocument: html.HTMLDocument } {
	const htmlContent = preprocessHTML(snapshot.getText(0, snapshot.getLength()), frontmatterEnd);

	return {
		virtualCode: getHTMLVirtualCode(htmlContent),
		htmlDocument: getHTMLDocument(htmlContent),
	};
}

const createScanner = htmlLs.createScanner as (
	input: string,
	initialOffset?: number,
	initialState?: html.ScannerState,
) => html.Scanner;

/**
 * scan the text and remove any `>` or `<` that cause the tag to end short
 */
export function preprocessHTML(text: string, frontmatterEnd?: number) {
	let content = text.split('').fill(' ', 0, frontmatterEnd).join('');

	let scanner = createScanner(content);
	let token = scanner.scan();
	let currentStartTagStart: number | null = null;

	while (token !== html.TokenType.EOS) {
		const offset = scanner.getTokenOffset();

		if (token === html.TokenType.StartTagOpen) {
			currentStartTagStart = offset;
		}

		if (token === html.TokenType.StartTagClose) {
			if (shouldBlankStartOrEndTagLike(offset)) {
				blankStartOrEndTagLike(offset);
			} else {
				currentStartTagStart = null;
			}
		}

		if (token === html.TokenType.StartTagSelfClose) {
			currentStartTagStart = null;
		}

		// <Foo checked={a < 1}>
		// https://github.com/microsoft/vscode-html-languageservice/blob/71806ef57be07e1068ee40900ef8b0899c80e68a/src/parser/htmlScanner.ts#L327
		if (
			token === html.TokenType.Unknown &&
			scanner.getScannerState() === html.ScannerState.WithinTag &&
			scanner.getTokenText() === '<' &&
			shouldBlankStartOrEndTagLike(offset)
		) {
			blankStartOrEndTagLike(offset);
		}

		// TODO: Handle TypeScript generics inside expressions / Use the compiler to parse HTML instead?

		token = scanner.scan();
	}

	return content;

	function shouldBlankStartOrEndTagLike(offset: number) {
		// not null rather than falsy, otherwise it won't work on first tag(0)
		return (
			currentStartTagStart !== null && isInsideExpression(content, currentStartTagStart, offset)
		);
	}

	function blankStartOrEndTagLike(offset: number, state?: html.ScannerState) {
		content = content.substring(0, offset) + ' ' + content.substring(offset + 1);
		scanner = createScanner(content, offset, state ?? html.ScannerState.WithinTag);
	}
}

function getHTMLVirtualCode(preprocessedHTML: string): VirtualCode {
	return {
		id: `html`,
		languageId: 'html',
		snapshot: {
			getText: (start, end) => preprocessedHTML.substring(start, end),
			getLength: () => preprocessedHTML.length,
			getChangeRange: () => undefined,
		},
		mappings: [
			{
				sourceOffsets: [0],
				generatedOffsets: [0],
				lengths: [preprocessedHTML.length],
				data: {
					verification: true,
					completion: true,
					semantic: true,
					navigation: true,
					structure: true,
					format: false,
				},
			},
		],
		embeddedCodes: [],
	};
}

function getHTMLDocument(preprocessedHTML: string): html.HTMLDocument {
	return htmlLs.parseHTMLDocument({ getText: () => preprocessedHTML } as any);
}

Domain

Subdomains

Dependencies

Frequently Asked Questions

What does parseHTML.ts do?
parseHTML.ts is a source file in the astro codebase, written in typescript. It belongs to the CoreAstro domain, CoreMiddleware subdomain.
What functions are defined in parseHTML.ts?
parseHTML.ts defines 5 function(s): getHTMLDocument, getHTMLVirtualCode, input, parseHTML, preprocessHTML.
What does parseHTML.ts depend on?
parseHTML.ts imports 5 module(s): isInsideExpression, language-core, typescript, utils.ts, vscode-html-languageservice.
Where is parseHTML.ts in the architecture?
parseHTML.ts is located at packages/language-tools/language-server/src/core/parseHTML.ts (domain: CoreAstro, subdomain: CoreMiddleware, directory: packages/language-tools/language-server/src/core).

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free