diff --git a/src/core/registry.js b/src/core/registry.js index 724246b656..e22ffd3099 100644 --- a/src/core/registry.js +++ b/src/core/registry.js @@ -1,8 +1,8 @@ import { kebabToCamelCase } from '../shared/util.js'; -import { cloneGrammar } from '../util/extend.js'; +import { extend } from '../util/extend.js'; +import { grammarPatch } from '../util/grammar-patch.js'; import { forEach, toArray } from '../util/iterables.js'; -import { extend } from '../util/language-util.js'; -import { defineLazyProperty } from '../util/objects.js'; +import { deepClone, defineLazyProperty } from '../util/objects.js'; /** * TODO: docs @@ -221,7 +221,7 @@ export class Registry { const base = entry?.proto.base; // We need this so that any code modifying the base grammar doesn't affect other instances - const baseGrammar = base && cloneGrammar(required(base.id), base.id); + const baseGrammar = base && deepClone(required(base.id)); const requiredLanguages = toArray( /** @type {LanguageProto | LanguageProto[] | undefined} */ (entry?.proto.require) @@ -240,7 +240,7 @@ export class Registry { else { const options = { getOptionalLanguage: id => this.getLanguage(id), - extend: (id, ref) => extend(required(id), id, ref), + extend: (id, ref) => extend(required(id), ref), ...(baseGrammar && { base: baseGrammar }), ...(requiredLanguages.length && { languages }), }; @@ -249,10 +249,10 @@ export class Registry { } if (baseGrammar) { - evaluatedGrammar = extend(baseGrammar, base.id, evaluatedGrammar); + evaluatedGrammar = extend(baseGrammar, evaluatedGrammar); } - return (entry.evaluatedGrammar = evaluatedGrammar); + return (entry.evaluatedGrammar = grammarPatch(evaluatedGrammar)); } } diff --git a/src/core/tokenize/tokenize.js b/src/core/tokenize/tokenize.js index a556448758..3bf23dd423 100644 --- a/src/core/tokenize/tokenize.js +++ b/src/core/tokenize/tokenize.js @@ -42,7 +42,14 @@ export function tokenize (text, grammar) { const tokenList = new LinkedList(); tokenList.addAfter(tokenList.head, text); - _matchGrammar.call(prism, text, tokenList, grammar, tokenList.head, 0); + _matchGrammar.call( + prism, + text, + tokenList, + /** @type {GrammarTokens} */ (grammar), + tokenList.head, + 0 + ); return tokenList.toArray(); } @@ -50,5 +57,6 @@ export function tokenize (text, grammar) { /** * @typedef {import('../../types.d.ts').TokenStream} TokenStream * @typedef {import('../../types.d.ts').Grammar} Grammar + * @typedef {import('../../types.d.ts').GrammarTokens} GrammarTokens * @typedef {import('../prism.js').Prism} Prism */ diff --git a/src/languages/c.js b/src/languages/c.js index 25ec156775..9b96dcdebb 100644 --- a/src/languages/c.js +++ b/src/languages/c.js @@ -73,7 +73,7 @@ export default { /* OpenCL host API */ const extensions = getOptionalLanguage('opencl-extensions'); if (extensions) { - insertBefore(base, 'keyword', extensions); + insertBefore(base, 'keyword', /** @type {GrammarTokens} */ (extensions)); delete base['type-opencl-host-cpp']; } @@ -104,4 +104,5 @@ export default { /** * @typedef {import('../types.d.ts').GrammarToken} GrammarToken + * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens */ diff --git a/src/languages/cpp.js b/src/languages/cpp.js index 77a3bf6202..0cfe7057ed 100644 --- a/src/languages/cpp.js +++ b/src/languages/cpp.js @@ -103,7 +103,11 @@ export default { /* OpenCL host API */ const extensions = getOptionalLanguage('opencl-extensions'); if (extensions) { - insertBefore(cpp, 'keyword', extensions); + insertBefore( + cpp, + 'keyword', + /** @type {import('../types.d.ts').GrammarTokens} */ (extensions) + ); } const baseInside = { ...cpp }; diff --git a/src/languages/css.js b/src/languages/css.js index c053c21dab..c70e8c1de5 100644 --- a/src/languages/css.js +++ b/src/languages/css.js @@ -89,7 +89,11 @@ export default { const extras = getOptionalLanguage('css-extras'); if (extras) { - insertBefore(css, 'function', extras); + insertBefore( + css, + 'function', + /** @type {import('../types.d.ts').GrammarTokens} */ (extras) + ); } return css; diff --git a/src/languages/hlsl.js b/src/languages/hlsl.js index 16b90ba47f..9396a7d6e2 100644 --- a/src/languages/hlsl.js +++ b/src/languages/hlsl.js @@ -12,7 +12,9 @@ export default { // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords // https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-reserved-words 'class-name': [ - ...toArray(base['class-name']), + ...toArray( + /** @type {import('../types.d.ts').GrammarTokens} */ (base)['class-name'] + ), /\b(?:AppendStructuredBuffer|BlendState|Buffer|ByteAddressBuffer|CompileShader|ComputeShader|ConsumeStructuredBuffer|DepthStencilState|DepthStencilView|DomainShader|GeometryShader|Hullshader|InputPatch|LineStream|OutputPatch|PixelShader|PointStream|RWBuffer|RWByteAddressBuffer|RWStructuredBuffer|RWTexture(?:1D|1DArray|2D|2DArray|3D)|RasterizerState|RenderTargetView|SamplerComparisonState|SamplerState|StructuredBuffer|Texture(?:1D|1DArray|2D|2DArray|2DMS|2DMSArray|3D|Cube|CubeArray)|TriangleStream|VertexShader)\b/, ], 'keyword': [ diff --git a/src/languages/php.js b/src/languages/php.js index daadd32f68..736d85dde9 100644 --- a/src/languages/php.js +++ b/src/languages/php.js @@ -352,7 +352,7 @@ export default { const extras = getOptionalLanguage('php-extras'); if (extras) { - insertBefore(php, 'variable', extras); + insertBefore(php, 'variable', /** @type {GrammarTokens} */ (extras)); } const embedded = embeddedIn('markup'); @@ -376,4 +376,5 @@ export default { /** * @typedef {import('../types.d.ts').Grammar} Grammar + * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens */ diff --git a/src/languages/typescript.js b/src/languages/typescript.js index 69a7e721ae..206709362a 100644 --- a/src/languages/typescript.js +++ b/src/languages/typescript.js @@ -8,8 +8,7 @@ export default { require: javascript, alias: 'ts', grammar ({ extend }) { - /** @type {import('../types.d.ts').Grammar} */ - const typeInside = {}; + const typeInside = /** @type {import('../types.d.ts').Grammar} */ ({}); const typescript = extend('javascript', { 'class-name': { @@ -24,7 +23,7 @@ export default { }); typescript.keyword = [ - ...toArray(typescript.keyword), + ...toArray(/** @type {import('../types.d.ts').GrammarTokens} */ (typescript).keyword), // The keywords TypeScript adds to JavaScript /\b(?:abstract|declare|is|keyof|out|readonly|require|satisfies)\b/, diff --git a/src/types.d.ts b/src/types.d.ts index 7958dd68a5..c4feac49c1 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -198,7 +198,35 @@ export type GrammarSpecial = { $tokenize?: (code: string, grammar: Grammar, Prism: Prism) => TokenStream; }; -export type Grammar = GrammarTokens & GrammarSpecial; +/** + * Tokens within $insert + */ +export type InsertableToken = (RegExpLike | GrammarToken | (RegExpLike | GrammarToken)[]) & { + $before?: TokenName | TokenName[]; + $after?: TokenName | TokenName[]; +}; + +/** + * A grammar that is defined as its delta from another grammar. + */ +export type GrammarPatch = { + $insert?: Partial>; + $insertBefore?: Partial>; + $insertAfter?: Partial>; + $delete?: TokenName[]; + $merge?: Partial< + Record> & { pattern?: RegExpLike }> + >; +}; + +export interface Grammar extends GrammarSpecial, GrammarPatch { + [token: string]: + | RegExpLike + | GrammarToken + | (RegExpLike | GrammarToken)[] + | GrammarSpecial[keyof GrammarSpecial] + | GrammarPatch[keyof GrammarPatch]; +} export interface PlainObject { [key: string]: unknown; diff --git a/src/util/extend.js b/src/util/extend.js index fb369f2635..3412d83689 100644 --- a/src/util/extend.js +++ b/src/util/extend.js @@ -1,3 +1,5 @@ +import { betterAssign, deepClone } from './objects.js'; + /** * Creates a deep copy of the language with the given id and appends the given tokens. * @@ -13,9 +15,8 @@ * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens. * Furthermore, all non-overwriting tokens should be placed after the overwriting ones. * - * @param {Grammar} grammar The grammar of the language to extend. - * @param {string} id The id of the language to extend. - * @param {Grammar} reDef The new tokens to append. + * @param {Grammar} base The grammar of the language to extend. + * @param {Grammar} grammar The new tokens to append. * @returns {Grammar} The new language created. * @example * Prism.languages['css-with-colors'] = Prism.languages.extend('css', { @@ -26,120 +27,67 @@ * 'color': /\b(?:red|green|blue)\b/ * }); */ -export function extend (grammar, id, reDef) { - const lang = cloneGrammar(grammar, id); +export function extend (base, grammar) { + const lang = deepClone(base); + + for (const key in grammar) { + if (typeof key !== 'string' || key.startsWith('$')) { + // ignore special keys + continue; + } - for (const key in reDef) { - lang[key] = reDef[key]; + lang[key] = grammar[key]; } - return lang; -} + if (grammar.$insertBefore) { + lang.$insertBefore = betterAssign(lang.$insertBefore ?? {}, grammar.$insertBefore); + } -/** - * @param {Grammar} grammar - * @param {string} id - * @returns {Grammar} - */ -export function cloneGrammar (grammar, id) { - /** @type {Grammar} */ - const result = {}; + if (grammar.$insertAfter) { + lang.$insertAfter = betterAssign(lang.$insertAfter ?? {}, grammar.$insertAfter); + } - /** @type {Map} */ - const visited = new Map(); + if (grammar.$insert) { + // Syntactic sugar for $insertBefore/$insertAfter + for (const tokenName in grammar.$insert) { + const def = grammar.$insert[tokenName]; + const { $before, $after, ...token } = def; + const relToken = $before || $after; + const all = $before ? '$insertBefore' : '$insertAfter'; + lang[all] ??= {}; - /** - * @param {GrammarToken | RegExpLike} value - */ - function cloneToken (value) { - if (!value.pattern) { - return value; - } - else { - /** @type {GrammarToken} */ - const copy = { pattern: value.pattern }; - if (value.lookbehind) { - copy.lookbehind = value.lookbehind; + if (Array.isArray(relToken)) { + // Insert in multiple places + for (const t of relToken) { + lang[all][t][tokenName] = token; + } } - if (value.greedy) { - copy.greedy = value.greedy; + else if (relToken) { + (lang[all][relToken] ??= {})[tokenName] = token; } - if (value.alias) { - copy.alias = Array.isArray(value.alias) ? [...value.alias] : value.alias; + else { + lang[tokenName] = token; } - if (value.inside) { - copy.inside = cloneRef(value.inside); - } - return copy; } } - /** - * @param {GrammarTokens['string']} value - */ - function cloneTokens (value) { - if (!value) { - return undefined; - } - else if (Array.isArray(value)) { - return value.map(cloneToken); + if (grammar.$delete) { + if (lang.$delete) { + // base also had $delete + lang.$delete.push(...grammar.$delete); } else { - return cloneToken(value); + lang.$delete = [...grammar.$delete]; } } - /** - * @param {string | Grammar} ref - */ - function cloneRef (ref) { - if (ref === id) { - // self ref - return result; - } - else if (typeof ref === 'string') { - return ref; - } - else { - return clone(ref); - } + if (grammar.$merge) { + lang.$merge = betterAssign(lang.$merge ?? {}, grammar.$merge); } - /** - * @param {Grammar} value - */ - function clone (value) { - let mapped = visited.get(value); - if (mapped === undefined) { - mapped = value === grammar ? result : {}; - visited.set(value, mapped); - - // tokens - for (const [key, tokens] of Object.entries(value)) { - mapped[key] = cloneTokens(/** @type {GrammarToken[]} */ (tokens)); - } - - // rest - const r = value.$rest; - if (r != null) { - mapped.$rest = cloneRef(r); - } - - // tokenize - const t = value.$tokenize; - if (t) { - mapped.$tokenize = t; - } - } - return mapped; - } - - return clone(grammar); + return lang; } /** * @typedef {import('../types.d.ts').Grammar} Grammar - * @typedef {import('../types.d.ts').GrammarToken} GrammarToken - * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens - * @typedef {import('../types.d.ts').RegExpLike} RegExpLike */ diff --git a/src/util/grammar-patch.js b/src/util/grammar-patch.js new file mode 100644 index 0000000000..bbf1f7081f --- /dev/null +++ b/src/util/grammar-patch.js @@ -0,0 +1,93 @@ +import { insertAfter, insertBefore } from './insert.js'; +import { deepMerge } from './objects.js'; + +/** + * Apply a patch to a grammar to modify it. + * The patch and the grammar may be the same object. + * + * @param {Grammar} grammar + * @param {Grammar} [patch=grammar] + * @returns {Grammar} + */ +export function grammarPatch (grammar, patch = grammar) { + if (patch.$insertBefore) { + for (const key in patch.$insertBefore) { + const tokens = patch.$insertBefore[key]; + + if (key?.includes('/')) { + // Deep key + let path = key.split('/'); + const lastKey = path.pop(); + path = path.flatMap(key => [key, 'inside']); // add `inside` after each key + // @ts-ignore + const obj = path.reduce((acc, key) => acc?.[key], grammar); + + if (obj) { + // @ts-ignore + insertBefore(obj, lastKey, tokens); + } + } + else if (tokens) { + // @ts-ignore + insertBefore(grammar, key, tokens); + } + } + delete grammar.$insertBefore; + } + + if (patch.$insertAfter) { + for (const key in patch.$insertAfter) { + const tokens = patch.$insertAfter[key]; + + if (key?.includes('/')) { + // Deep key + let path = key.split('/'); + const lastKey = path.pop(); + path = path.flatMap(key => [key, 'inside']); // add `inside` after each key + // @ts-ignore + const obj = path.reduce((acc, key) => acc?.[key], grammar); + + if (obj) { + // @ts-ignore + insertAfter(obj, lastKey, tokens); + } + } + else if (tokens) { + // @ts-ignore + insertAfter(grammar, key, tokens); + } + } + delete grammar.$insertAfter; + } + + if (patch.$delete) { + // @ts-ignore + for (const key of patch.$delete) { + // TODO support deep keys + delete grammar[key]; + } + delete grammar.$delete; + } + + if (patch.$merge) { + for (const key in patch.$merge) { + const tokens = patch.$merge[key]; + + if (grammar[key]) { + deepMerge(grammar[key], tokens); + } + else { + grammar[key] = /** @type {GrammarTokens} */ (tokens); + } + } + + delete grammar.$merge; + } + + return grammar; +} + +/** + * @typedef {import('../types.d.ts').Grammar} Grammar + * @typedef {import('../types.d.ts').GrammarTokens} GrammarTokens + */ diff --git a/src/util/insert.js b/src/util/insert.js index 229d0f656a..1764c4bf75 100644 --- a/src/util/insert.js +++ b/src/util/insert.js @@ -1,3 +1,5 @@ +import { betterAssign } from './objects.js'; + /** * Inserts tokens _before_ another token in the given grammar. * @@ -26,48 +28,59 @@ * }); * ``` * - * ## Special cases - * - * If the grammars of `grammar` and `insert` have tokens with the same name, the tokens in `grammar`'s grammar - * will be ignored. - * - * This behavior can be used to insert tokens after `before`: + * @param {Grammar} grammar The grammar to be modified. + * @param {string} beforeKey The key to insert before. + * @param {GrammarTokens} tokens An object containing the key-value pairs to be inserted. + */ +export function insertBefore (grammar, beforeKey, tokens) { + insert(grammar, beforeKey, tokens, 'before'); +} + +/** * - * ```js - * insertBefore(markup, 'comment', { - * 'comment': markup.comment, - * // tokens after 'comment' - * }); - * ``` + * @param {Grammar} grammar + * @param {string} afterKey + * @param {GrammarTokens} tokens + */ +export function insertAfter (grammar, afterKey, tokens) { + insert(grammar, afterKey, tokens); +} + +/** * - * @param {Grammar} grammar The grammar to be modified. - * @param {string} before The key to insert before. - * @param {GrammarTokens} insert An object containing the key-value pairs to be inserted. - * @returns {void} + * @param {Grammar} grammar + * @param {string} atKey + * @param {GrammarTokens} insert + * @param {'before' | 'after'} [position='after'] */ -export function insertBefore (grammar, before, insert) { - if (!(before in grammar)) { - throw new Error(`"${before}" has to be a key of grammar.`); +export function insert (grammar, atKey, insert, position = 'after') { + if (!(atKey in grammar)) { + // TODO support deep keys + throw new Error(`"${atKey}" has to be a key of grammar.`); } - const grammarEntries = Object.entries(grammar); + const descriptors = Object.getOwnPropertyDescriptors(grammar); // delete all keys in `grammar` - for (const [key] of grammarEntries) { - delete grammar[key]; + for (const key in descriptors) { + if (Object.hasOwn(descriptors, key)) { + delete grammar[key]; + } } // insert keys again - for (const [key, value] of grammarEntries) { - if (key === before) { - for (const insertKey of Object.keys(insert)) { - grammar[insertKey] = insert[insertKey]; - } + for (const key in descriptors) { + if (position === 'before' && key === atKey) { + betterAssign(grammar, insert); } // Do not insert tokens which also occur in `insert`. See #1525 - if (!insert.hasOwnProperty(key)) { - grammar[key] = /** @type {GrammarToken} */ (value); + if (!Object.hasOwn(insert, key)) { + Object.defineProperty(grammar, key, descriptors[key]); + } + + if (position === 'after' && key === atKey) { + betterAssign(grammar, insert); } } } diff --git a/src/util/objects.js b/src/util/objects.js index 76bde2ee38..5cef7fe628 100644 --- a/src/util/objects.js +++ b/src/util/objects.js @@ -1,3 +1,5 @@ +import { toArray } from './iterables.js'; + /** * @template {Record} T * @template {keyof T} K @@ -43,3 +45,140 @@ export function defineSimpleProperty (obj, key, value) { configurable: false, }); } + +/** + * + * @param {any} obj + * @param {string} type + * @returns {boolean} + */ +export function isObject (obj, type) { + if (!obj || typeof obj !== 'object') { + return false; + } + + const proto = Object.getPrototypeOf(obj); + return proto.constructor?.name === type; +} + +/** + * @param {any} obj + * @returns {boolean} + */ +export function isPlainObject (obj) { + return isObject(obj, 'Object'); +} + +/** + * @typedef {object} MergeOptions + * @property {any[]} [emptyValues] + * @property {string[]} [containers] + * @property {(value: any, key?: Property, parent?: any) => boolean} [isContainer] + * @property {boolean} [mergeArrays] + */ + +/** @typedef {string | number | symbol} Property */ + +/** + * + * @param {any} target + * @param {any} source + * @param {MergeOptions} [options={}] + * @returns + */ +export function deepMerge (target, source, options = {}) { + const { + emptyValues = [undefined], + containers = ['Object', 'EventTarget'], + isContainer = value => containers.some(type => isObject(value, type)), + mergeArrays = false, + } = options; + + if (mergeArrays && (Array.isArray(target) || Array.isArray(source))) { + target = toArray(target); + source = toArray(source); + return target.concat(source); + } + + if (isContainer(target) && isContainer(source)) { + for (const key in source) { + target[key] = deepMerge(target[key], source[key], options); + } + + return target; + } + + if (emptyValues.includes(target)) { + return source; + } + + return source ?? target; +} + +/** + * @typedef {object} CloneOptions + * + * Used internally to store clones of objects, + * both for performance but mainly to avoid getting tripped up in circular references + * @property {WeakMap} [_clones] + */ + +/** + * @param {any} obj + * @param {CloneOptions} options + */ +export function deepClone (obj, options = {}) { + if (!obj || typeof obj !== 'object') { + return obj; + } + + options._clones ??= new WeakMap(); + const { _clones } = options; + + if (_clones.has(obj)) { + return _clones.get(obj); + } + + let ret = obj; + + if (Array.isArray(obj)) { + ret = []; + _clones.set(obj, ret); + + for (const item of obj) { + ret.push(deepClone(item, options)); + } + } + else if (isPlainObject(obj)) { + ret = { ...obj }; + _clones.set(obj, ret); + + for (const key in obj) { + ret[key] = deepClone(obj[key], options); + } + } + + return ret; +} + +/** + * Like Object.assign() but preserves accessors. + * + * @param {Record} target + * @param {Record[]} sources + */ +export function betterAssign (target, ...sources) { + for (const source of sources) { + const descriptors = Object.getOwnPropertyDescriptors(source); + for (const key in descriptors) { + if (Object.hasOwn(target, key)) { + continue; + } + + const descriptor = descriptors[key]; + Object.defineProperty(target, key, descriptor); + } + } + + return target; +} diff --git a/tests/core/registry.js b/tests/core/registry.js index 8faee66356..f7d2e76451 100644 --- a/tests/core/registry.js +++ b/tests/core/registry.js @@ -5,7 +5,7 @@ describe('Registry', () => { it('should resolve aliases', () => { const { components } = new Prism(); - const grammar = {}; + const grammar = /** @type {Grammar} */ ({}); components.add({ id: 'a', alias: 'b', grammar }); assert.isTrue(components.has('a')); @@ -21,7 +21,7 @@ describe('Registry', () => { it('should resolve aliases in optional dependencies', () => { const { components } = new Prism(); - const grammar = {}; + const grammar = /** @type {Grammar} */ ({}); components.add({ id: 'a', alias: 'b', grammar }); components.add({ id: 'c', @@ -62,4 +62,7 @@ describe('Registry', () => { }); }); -/** @typedef {import('../../src/types.d.ts').GrammarOptions} GrammarOptions */ +/** + * @typedef {import('../../src/types.d.ts').GrammarOptions} GrammarOptions + * @typedef {import('../../src/types.d.ts').Grammar} Grammar + */ diff --git a/tests/tsconfig.json b/tests/tsconfig.json index bc185a1ebf..485c7ec043 100644 --- a/tests/tsconfig.json +++ b/tests/tsconfig.json @@ -12,7 +12,7 @@ // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ /* Language and Environment */ - "target": "ES2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, + "target": "ES2022" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ // "jsx": "preserve", /* Specify what JSX code is generated. */ // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */ diff --git a/tsconfig.json b/tsconfig.json index 238dafeae5..2b52d61200 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -11,7 +11,7 @@ // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ /* Language and Environment */ - "target": "es2020" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, + "target": "ES2022" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */, // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ // "jsx": "preserve", /* Specify what JSX code is generated. */ // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */