parseHTML.ts — astro Source File
Architecture documentation for parseHTML.ts, a typescript file in the astro codebase. 5 imports, 0 dependents.
Entity Profile
Dependency Diagram
graph LR b9e6df3b_0db3_5da0_196d_ad954be5a3bd["parseHTML.ts"] 839dada7_8bd4_2ee4_9498_f38d21d2f7e9["utils.ts"] b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 839dada7_8bd4_2ee4_9498_f38d21d2f7e9 5ecba48f_d9f9_6a3a_2e04_bda4d9c652a4["isInsideExpression"] b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 5ecba48f_d9f9_6a3a_2e04_bda4d9c652a4 040ca79b_dadf_4383_efd2_c0b13744e9f1["language-core"] b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 040ca79b_dadf_4383_efd2_c0b13744e9f1 41525615_7e06_b0e8_f601_674c57b118ee["typescript"] b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 41525615_7e06_b0e8_f601_674c57b118ee 4e2ee814_ff7b_a348_0e3a_6e6d7b34afb6["vscode-html-languageservice"] b9e6df3b_0db3_5da0_196d_ad954be5a3bd --> 4e2ee814_ff7b_a348_0e3a_6e6d7b34afb6 style b9e6df3b_0db3_5da0_196d_ad954be5a3bd fill:#6366f1,stroke:#818cf8,color:#fff
Relationship Graph
Source Code
import type { VirtualCode } from '@volar/language-core';
import type ts from 'typescript';
import * as html from 'vscode-html-languageservice';
import { isInsideExpression } from '../plugins/utils';
const htmlLs = html.getLanguageService();
export function parseHTML(
snapshot: ts.IScriptSnapshot,
frontmatterEnd: number,
): { virtualCode: VirtualCode; htmlDocument: html.HTMLDocument } {
const htmlContent = preprocessHTML(snapshot.getText(0, snapshot.getLength()), frontmatterEnd);
return {
virtualCode: getHTMLVirtualCode(htmlContent),
htmlDocument: getHTMLDocument(htmlContent),
};
}
const createScanner = htmlLs.createScanner as (
input: string,
initialOffset?: number,
initialState?: html.ScannerState,
) => html.Scanner;
/**
* scan the text and remove any `>` or `<` that cause the tag to end short
*/
export function preprocessHTML(text: string, frontmatterEnd?: number) {
let content = text.split('').fill(' ', 0, frontmatterEnd).join('');
let scanner = createScanner(content);
let token = scanner.scan();
let currentStartTagStart: number | null = null;
while (token !== html.TokenType.EOS) {
const offset = scanner.getTokenOffset();
if (token === html.TokenType.StartTagOpen) {
currentStartTagStart = offset;
}
if (token === html.TokenType.StartTagClose) {
if (shouldBlankStartOrEndTagLike(offset)) {
blankStartOrEndTagLike(offset);
} else {
currentStartTagStart = null;
}
}
if (token === html.TokenType.StartTagSelfClose) {
currentStartTagStart = null;
}
// <Foo checked={a < 1}>
// https://github.com/microsoft/vscode-html-languageservice/blob/71806ef57be07e1068ee40900ef8b0899c80e68a/src/parser/htmlScanner.ts#L327
if (
token === html.TokenType.Unknown &&
scanner.getScannerState() === html.ScannerState.WithinTag &&
scanner.getTokenText() === '<' &&
shouldBlankStartOrEndTagLike(offset)
) {
blankStartOrEndTagLike(offset);
}
// TODO: Handle TypeScript generics inside expressions / Use the compiler to parse HTML instead?
token = scanner.scan();
}
return content;
function shouldBlankStartOrEndTagLike(offset: number) {
// not null rather than falsy, otherwise it won't work on first tag(0)
return (
currentStartTagStart !== null && isInsideExpression(content, currentStartTagStart, offset)
);
}
function blankStartOrEndTagLike(offset: number, state?: html.ScannerState) {
content = content.substring(0, offset) + ' ' + content.substring(offset + 1);
scanner = createScanner(content, offset, state ?? html.ScannerState.WithinTag);
}
}
function getHTMLVirtualCode(preprocessedHTML: string): VirtualCode {
return {
id: `html`,
languageId: 'html',
snapshot: {
getText: (start, end) => preprocessedHTML.substring(start, end),
getLength: () => preprocessedHTML.length,
getChangeRange: () => undefined,
},
mappings: [
{
sourceOffsets: [0],
generatedOffsets: [0],
lengths: [preprocessedHTML.length],
data: {
verification: true,
completion: true,
semantic: true,
navigation: true,
structure: true,
format: false,
},
},
],
embeddedCodes: [],
};
}
function getHTMLDocument(preprocessedHTML: string): html.HTMLDocument {
return htmlLs.parseHTMLDocument({ getText: () => preprocessedHTML } as any);
}
Domain
Subdomains
Dependencies
- isInsideExpression
- language-core
- typescript
- utils.ts
- vscode-html-languageservice
Source
Frequently Asked Questions
What does parseHTML.ts do?
parseHTML.ts is a source file in the astro codebase, written in typescript. It belongs to the CoreAstro domain, CoreMiddleware subdomain.
What functions are defined in parseHTML.ts?
parseHTML.ts defines 5 function(s): getHTMLDocument, getHTMLVirtualCode, input, parseHTML, preprocessHTML.
What does parseHTML.ts depend on?
parseHTML.ts imports 5 module(s): isInsideExpression, language-core, typescript, utils.ts, vscode-html-languageservice.
Where is parseHTML.ts in the architecture?
parseHTML.ts is located at packages/language-tools/language-server/src/core/parseHTML.ts (domain: CoreAstro, subdomain: CoreMiddleware, directory: packages/language-tools/language-server/src/core).
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free