414 lines
14 KiB
TypeScript
414 lines
14 KiB
TypeScript
import {Fragment} from "./fragment"
|
||
import {NodeType} from "./schema"
|
||
|
||
type MatchEdge = {type: NodeType, next: ContentMatch}
|
||
|
||
/// Instances of this class represent a match state of a node type's
|
||
/// [content expression](#model.NodeSpec.content), and can be used to
|
||
/// find out whether further content matches here, and whether a given
|
||
/// position is a valid end of the node.
|
||
export class ContentMatch {
|
||
/// @internal
|
||
readonly next: MatchEdge[] = []
|
||
/// @internal
|
||
readonly wrapCache: (NodeType | readonly NodeType[] | null)[] = []
|
||
|
||
/// @internal
|
||
constructor(
|
||
/// True when this match state represents a valid end of the node.
|
||
readonly validEnd: boolean
|
||
) {}
|
||
|
||
/// @internal
|
||
static parse(string: string, nodeTypes: {readonly [name: string]: NodeType}): ContentMatch {
|
||
let stream = new TokenStream(string, nodeTypes)
|
||
if (stream.next == null) return ContentMatch.empty
|
||
let expr = parseExpr(stream)
|
||
if (stream.next) stream.err("Unexpected trailing text")
|
||
let match = dfa(nfa(expr))
|
||
checkForDeadEnds(match, stream)
|
||
return match
|
||
}
|
||
|
||
/// Match a node type, returning a match after that node if
|
||
/// successful.
|
||
matchType(type: NodeType): ContentMatch | null {
|
||
for (let i = 0; i < this.next.length; i++)
|
||
if (this.next[i].type == type) return this.next[i].next
|
||
return null
|
||
}
|
||
|
||
/// Try to match a fragment. Returns the resulting match when
|
||
/// successful.
|
||
matchFragment(frag: Fragment, start = 0, end = frag.childCount): ContentMatch | null {
|
||
let cur: ContentMatch | null = this
|
||
for (let i = start; cur && i < end; i++)
|
||
cur = cur.matchType(frag.child(i).type)
|
||
return cur
|
||
}
|
||
|
||
/// @internal
|
||
get inlineContent() {
|
||
return this.next.length != 0 && this.next[0].type.isInline
|
||
}
|
||
|
||
/// Get the first matching node type at this match position that can
|
||
/// be generated.
|
||
get defaultType(): NodeType | null {
|
||
for (let i = 0; i < this.next.length; i++) {
|
||
let {type} = this.next[i]
|
||
if (!(type.isText || type.hasRequiredAttrs())) return type
|
||
}
|
||
return null
|
||
}
|
||
|
||
/// @internal
|
||
compatible(other: ContentMatch) {
|
||
for (let i = 0; i < this.next.length; i++)
|
||
for (let j = 0; j < other.next.length; j++)
|
||
if (this.next[i].type == other.next[j].type) return true
|
||
return false
|
||
}
|
||
|
||
/// Try to match the given fragment, and if that fails, see if it can
|
||
/// be made to match by inserting nodes in front of it. When
|
||
/// successful, return a fragment of inserted nodes (which may be
|
||
/// empty if nothing had to be inserted). When `toEnd` is true, only
|
||
/// return a fragment if the resulting match goes to the end of the
|
||
/// content expression.
|
||
fillBefore(after: Fragment, toEnd = false, startIndex = 0): Fragment | null {
|
||
let seen: ContentMatch[] = [this]
|
||
function search(match: ContentMatch, types: readonly NodeType[]): Fragment | null {
|
||
let finished = match.matchFragment(after, startIndex)
|
||
if (finished && (!toEnd || finished.validEnd))
|
||
return Fragment.from(types.map(tp => tp.createAndFill()!))
|
||
|
||
for (let i = 0; i < match.next.length; i++) {
|
||
let {type, next} = match.next[i]
|
||
if (!(type.isText || type.hasRequiredAttrs()) && seen.indexOf(next) == -1) {
|
||
seen.push(next)
|
||
let found = search(next, types.concat(type))
|
||
if (found) return found
|
||
}
|
||
}
|
||
return null
|
||
}
|
||
|
||
return search(this, [])
|
||
}
|
||
|
||
/// Find a set of wrapping node types that would allow a node of the
|
||
/// given type to appear at this position. The result may be empty
|
||
/// (when it fits directly) and will be null when no such wrapping
|
||
/// exists.
|
||
findWrapping(target: NodeType): readonly NodeType[] | null {
|
||
for (let i = 0; i < this.wrapCache.length; i += 2)
|
||
if (this.wrapCache[i] == target) return this.wrapCache[i + 1] as (readonly NodeType[] | null)
|
||
let computed = this.computeWrapping(target)
|
||
this.wrapCache.push(target, computed)
|
||
return computed
|
||
}
|
||
|
||
/// @internal
|
||
computeWrapping(target: NodeType): readonly NodeType[] | null {
|
||
type Active = {match: ContentMatch, type: NodeType | null, via: Active | null}
|
||
let seen = Object.create(null), active: Active[] = [{match: this, type: null, via: null}]
|
||
while (active.length) {
|
||
let current = active.shift()!, match = current.match
|
||
if (match.matchType(target)) {
|
||
let result: NodeType[] = []
|
||
for (let obj: Active = current; obj.type; obj = obj.via!)
|
||
result.push(obj.type)
|
||
return result.reverse()
|
||
}
|
||
for (let i = 0; i < match.next.length; i++) {
|
||
let {type, next} = match.next[i]
|
||
if (!type.isLeaf && !type.hasRequiredAttrs() && !(type.name in seen) && (!current.type || next.validEnd)) {
|
||
active.push({match: type.contentMatch, type, via: current})
|
||
seen[type.name] = true
|
||
}
|
||
}
|
||
}
|
||
return null
|
||
}
|
||
|
||
/// The number of outgoing edges this node has in the finite
|
||
/// automaton that describes the content expression.
|
||
get edgeCount() {
|
||
return this.next.length
|
||
}
|
||
|
||
/// Get the _n_th outgoing edge from this node in the finite
|
||
/// automaton that describes the content expression.
|
||
edge(n: number): MatchEdge {
|
||
if (n >= this.next.length) throw new RangeError(`There's no ${n}th edge in this content match`)
|
||
return this.next[n]
|
||
}
|
||
|
||
/// @internal
|
||
toString() {
|
||
let seen: ContentMatch[] = []
|
||
function scan(m: ContentMatch) {
|
||
seen.push(m)
|
||
for (let i = 0; i < m.next.length; i++)
|
||
if (seen.indexOf(m.next[i].next) == -1) scan(m.next[i].next)
|
||
}
|
||
scan(this)
|
||
return seen.map((m, i) => {
|
||
let out = i + (m.validEnd ? "*" : " ") + " "
|
||
for (let i = 0; i < m.next.length; i++)
|
||
out += (i ? ", " : "") + m.next[i].type.name + "->" + seen.indexOf(m.next[i].next)
|
||
return out
|
||
}).join("\n")
|
||
}
|
||
|
||
/// @internal
|
||
static empty = new ContentMatch(true)
|
||
}
|
||
|
||
class TokenStream {
|
||
inline: boolean | null = null
|
||
pos = 0
|
||
tokens: string[]
|
||
|
||
constructor(
|
||
readonly string: string,
|
||
readonly nodeTypes: {readonly [name: string]: NodeType}
|
||
) {
|
||
this.tokens = string.split(/\s*(?=\b|\W|$)/)
|
||
if (this.tokens[this.tokens.length - 1] == "") this.tokens.pop()
|
||
if (this.tokens[0] == "") this.tokens.shift()
|
||
}
|
||
|
||
get next() { return this.tokens[this.pos] }
|
||
|
||
eat(tok: string) { return this.next == tok && (this.pos++ || true) }
|
||
|
||
err(str: string): never { throw new SyntaxError(str + " (in content expression '" + this.string + "')") }
|
||
}
|
||
|
||
type Expr =
|
||
{type: "choice", exprs: Expr[]} |
|
||
{type: "seq", exprs: Expr[]} |
|
||
{type: "plus", expr: Expr} |
|
||
{type: "star", expr: Expr} |
|
||
{type: "opt", expr: Expr} |
|
||
{type: "range", min: number, max: number, expr: Expr} |
|
||
{type: "name", value: NodeType}
|
||
|
||
function parseExpr(stream: TokenStream): Expr {
|
||
let exprs: Expr[] = []
|
||
do { exprs.push(parseExprSeq(stream)) }
|
||
while (stream.eat("|"))
|
||
return exprs.length == 1 ? exprs[0] : {type: "choice", exprs}
|
||
}
|
||
|
||
function parseExprSeq(stream: TokenStream): Expr {
|
||
let exprs: Expr[] = []
|
||
do { exprs.push(parseExprSubscript(stream)) }
|
||
while (stream.next && stream.next != ")" && stream.next != "|")
|
||
return exprs.length == 1 ? exprs[0] : {type: "seq", exprs}
|
||
}
|
||
|
||
function parseExprSubscript(stream: TokenStream): Expr {
|
||
let expr = parseExprAtom(stream)
|
||
for (;;) {
|
||
if (stream.eat("+"))
|
||
expr = {type: "plus", expr}
|
||
else if (stream.eat("*"))
|
||
expr = {type: "star", expr}
|
||
else if (stream.eat("?"))
|
||
expr = {type: "opt", expr}
|
||
else if (stream.eat("{"))
|
||
expr = parseExprRange(stream, expr)
|
||
else break
|
||
}
|
||
return expr
|
||
}
|
||
|
||
function parseNum(stream: TokenStream) {
|
||
if (/\D/.test(stream.next)) stream.err("Expected number, got '" + stream.next + "'")
|
||
let result = Number(stream.next)
|
||
stream.pos++
|
||
return result
|
||
}
|
||
|
||
function parseExprRange(stream: TokenStream, expr: Expr): Expr {
|
||
let min = parseNum(stream), max = min
|
||
if (stream.eat(",")) {
|
||
if (stream.next != "}") max = parseNum(stream)
|
||
else max = -1
|
||
}
|
||
if (!stream.eat("}")) stream.err("Unclosed braced range")
|
||
return {type: "range", min, max, expr}
|
||
}
|
||
|
||
function resolveName(stream: TokenStream, name: string): readonly NodeType[] {
|
||
let types = stream.nodeTypes, type = types[name]
|
||
if (type) return [type]
|
||
let result: NodeType[] = []
|
||
for (let typeName in types) {
|
||
let type = types[typeName]
|
||
if (type.isInGroup(name)) result.push(type)
|
||
}
|
||
if (result.length == 0) stream.err("No node type or group '" + name + "' found")
|
||
return result
|
||
}
|
||
|
||
function parseExprAtom(stream: TokenStream): Expr {
|
||
if (stream.eat("(")) {
|
||
let expr = parseExpr(stream)
|
||
if (!stream.eat(")")) stream.err("Missing closing paren")
|
||
return expr
|
||
} else if (!/\W/.test(stream.next)) {
|
||
let exprs = resolveName(stream, stream.next).map(type => {
|
||
if (stream.inline == null) stream.inline = type.isInline
|
||
else if (stream.inline != type.isInline) stream.err("Mixing inline and block content")
|
||
return {type: "name", value: type} as Expr
|
||
})
|
||
stream.pos++
|
||
return exprs.length == 1 ? exprs[0] : {type: "choice", exprs}
|
||
} else {
|
||
stream.err("Unexpected token '" + stream.next + "'")
|
||
}
|
||
}
|
||
|
||
// The code below helps compile a regular-expression-like language
|
||
// into a deterministic finite automaton. For a good introduction to
|
||
// these concepts, see https://swtch.com/~rsc/regexp/regexp1.html
|
||
|
||
type Edge = {term: NodeType | undefined, to: number | undefined}
|
||
|
||
// Construct an NFA from an expression as returned by the parser. The
|
||
// NFA is represented as an array of states, which are themselves
|
||
// arrays of edges, which are `{term, to}` objects. The first state is
|
||
// the entry state and the last node is the success state.
|
||
//
|
||
// Note that unlike typical NFAs, the edge ordering in this one is
|
||
// significant, in that it is used to contruct filler content when
|
||
// necessary.
|
||
function nfa(expr: Expr): Edge[][] {
|
||
let nfa: Edge[][] = [[]]
|
||
connect(compile(expr, 0), node())
|
||
return nfa
|
||
|
||
function node() { return nfa.push([]) - 1 }
|
||
function edge(from: number, to?: number, term?: NodeType) {
|
||
let edge = {term, to}
|
||
nfa[from].push(edge)
|
||
return edge
|
||
}
|
||
function connect(edges: Edge[], to: number) {
|
||
edges.forEach(edge => edge.to = to)
|
||
}
|
||
|
||
function compile(expr: Expr, from: number): Edge[] {
|
||
if (expr.type == "choice") {
|
||
return expr.exprs.reduce((out, expr) => out.concat(compile(expr, from)), [] as Edge[])
|
||
} else if (expr.type == "seq") {
|
||
for (let i = 0;; i++) {
|
||
let next = compile(expr.exprs[i], from)
|
||
if (i == expr.exprs.length - 1) return next
|
||
connect(next, from = node())
|
||
}
|
||
} else if (expr.type == "star") {
|
||
let loop = node()
|
||
edge(from, loop)
|
||
connect(compile(expr.expr, loop), loop)
|
||
return [edge(loop)]
|
||
} else if (expr.type == "plus") {
|
||
let loop = node()
|
||
connect(compile(expr.expr, from), loop)
|
||
connect(compile(expr.expr, loop), loop)
|
||
return [edge(loop)]
|
||
} else if (expr.type == "opt") {
|
||
return [edge(from)].concat(compile(expr.expr, from))
|
||
} else if (expr.type == "range") {
|
||
let cur = from
|
||
for (let i = 0; i < expr.min; i++) {
|
||
let next = node()
|
||
connect(compile(expr.expr, cur), next)
|
||
cur = next
|
||
}
|
||
if (expr.max == -1) {
|
||
connect(compile(expr.expr, cur), cur)
|
||
} else {
|
||
for (let i = expr.min; i < expr.max; i++) {
|
||
let next = node()
|
||
edge(cur, next)
|
||
connect(compile(expr.expr, cur), next)
|
||
cur = next
|
||
}
|
||
}
|
||
return [edge(cur)]
|
||
} else if (expr.type == "name") {
|
||
return [edge(from, undefined, expr.value)]
|
||
} else {
|
||
throw new Error("Unknown expr type")
|
||
}
|
||
}
|
||
}
|
||
|
||
function cmp(a: number, b: number) { return b - a }
|
||
|
||
// Get the set of nodes reachable by null edges from `node`. Omit
|
||
// nodes with only a single null-out-edge, since they may lead to
|
||
// needless duplicated nodes.
|
||
function nullFrom(nfa: Edge[][], node: number): readonly number[] {
|
||
let result: number[] = []
|
||
scan(node)
|
||
return result.sort(cmp)
|
||
|
||
function scan(node: number): void {
|
||
let edges = nfa[node]
|
||
if (edges.length == 1 && !edges[0].term) return scan(edges[0].to!)
|
||
result.push(node)
|
||
for (let i = 0; i < edges.length; i++) {
|
||
let {term, to} = edges[i]
|
||
if (!term && result.indexOf(to!) == -1) scan(to!)
|
||
}
|
||
}
|
||
}
|
||
|
||
// Compiles an NFA as produced by `nfa` into a DFA, modeled as a set
|
||
// of state objects (`ContentMatch` instances) with transitions
|
||
// between them.
|
||
function dfa(nfa: Edge[][]): ContentMatch {
|
||
let labeled = Object.create(null)
|
||
return explore(nullFrom(nfa, 0))
|
||
|
||
function explore(states: readonly number[]) {
|
||
let out: [NodeType, number[]][] = []
|
||
states.forEach(node => {
|
||
nfa[node].forEach(({term, to}) => {
|
||
if (!term) return
|
||
let set: number[] | undefined
|
||
for (let i = 0; i < out.length; i++) if (out[i][0] == term) set = out[i][1]
|
||
nullFrom(nfa, to!).forEach(node => {
|
||
if (!set) out.push([term, set = []])
|
||
if (set.indexOf(node) == -1) set.push(node)
|
||
})
|
||
})
|
||
})
|
||
let state = labeled[states.join(",")] = new ContentMatch(states.indexOf(nfa.length - 1) > -1)
|
||
for (let i = 0; i < out.length; i++) {
|
||
let states = out[i][1].sort(cmp)
|
||
state.next.push({type: out[i][0], next: labeled[states.join(",")] || explore(states)})
|
||
}
|
||
return state
|
||
}
|
||
}
|
||
|
||
function checkForDeadEnds(match: ContentMatch, stream: TokenStream) {
|
||
for (let i = 0, work = [match]; i < work.length; i++) {
|
||
let state = work[i], dead = !state.validEnd, nodes: string[] = []
|
||
for (let j = 0; j < state.next.length; j++) {
|
||
let {type, next} = state.next[j]
|
||
nodes.push(type.name)
|
||
if (dead && !(type.isText || type.hasRequiredAttrs())) dead = false
|
||
if (work.indexOf(next) == -1) work.push(next)
|
||
}
|
||
if (dead) stream.err("Only non-generatable nodes (" + nodes.join(", ") + ") in a required position (see https://prosemirror.net/docs/guide/#generatable)")
|
||
}
|
||
}
|