|
| 1 | +import { visit } from 'unist-util-visit' |
| 2 | +import type { Root, Text, Element } from 'hast' |
| 3 | + |
| 4 | +interface Options { |
| 5 | + minLength?: number |
| 6 | + chunkSize?: number |
| 7 | + splitChars?: string[] |
| 8 | + skipTags?: string[] |
| 9 | +} |
| 10 | + |
| 11 | +const makeWbrNode = (): Element => { |
| 12 | + return { type: 'element', tagName: 'wbr', properties: {}, children: [] } |
| 13 | +} |
| 14 | + |
| 15 | +const chunkAndPush = ( |
| 16 | + str: string, |
| 17 | + size: number, |
| 18 | + out: Array<Text | Element> |
| 19 | +) => { |
| 20 | + for (let i = 0; i < str.length; i += size) { |
| 21 | + out.push({ type: 'text', value: str.slice(i, i + size) }) |
| 22 | + if (i + size < str.length) out.push(makeWbrNode()) |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +const escapeRegExp = (s: string) => { |
| 27 | + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') |
| 28 | +} |
| 29 | + |
| 30 | +export const rehypeKunInsertWbr = (options: Options = {}) => { |
| 31 | + const { |
| 32 | + minLength = 30, |
| 33 | + chunkSize = 20, |
| 34 | + splitChars = ['_', '-', '.', '/'], |
| 35 | + skipTags = ['code', 'pre', 'style', 'script', 'textarea', 'input'] |
| 36 | + } = options |
| 37 | + |
| 38 | + const splitRegex = new RegExp( |
| 39 | + `([${splitChars.map((c) => escapeRegExp(c)).join('')}])`, |
| 40 | + 'g' |
| 41 | + ) |
| 42 | + |
| 43 | + return (tree: Root) => { |
| 44 | + visit(tree, 'text', (node: Text, _idx, parent) => { |
| 45 | + if ( |
| 46 | + !parent || |
| 47 | + ((parent as Element).tagName && |
| 48 | + skipTags.includes((parent as Element).tagName)) |
| 49 | + ) |
| 50 | + return |
| 51 | + |
| 52 | + const text = node.value |
| 53 | + |
| 54 | + if (!text || text.length < minLength) return |
| 55 | + |
| 56 | + const tokens = text.split(/(\s+)/) |
| 57 | + |
| 58 | + const newNodes: Array<Text | Element> = [] |
| 59 | + |
| 60 | + for (const token of tokens) { |
| 61 | + if (!token || token.match(/^\s+$/)) { |
| 62 | + newNodes.push({ type: 'text', value: token }) |
| 63 | + continue |
| 64 | + } |
| 65 | + |
| 66 | + if (token.length >= minLength && splitRegex.test(token)) { |
| 67 | + const parts = token.split(splitRegex) |
| 68 | + for (let i = 0; i < parts.length; i++) { |
| 69 | + const part = parts[i] |
| 70 | + if (!part) continue |
| 71 | + if (splitChars.includes(part)) { |
| 72 | + newNodes.push(makeWbrNode()) |
| 73 | + newNodes.push({ type: 'text', value: part }) |
| 74 | + |
| 75 | + newNodes.push(makeWbrNode()) |
| 76 | + } else { |
| 77 | + if (part.length > chunkSize) { |
| 78 | + chunkAndPush(part, chunkSize, newNodes) |
| 79 | + } else { |
| 80 | + newNodes.push({ type: 'text', value: part }) |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | + } else if (token.length > chunkSize) { |
| 85 | + chunkAndPush(token, chunkSize, newNodes) |
| 86 | + } else { |
| 87 | + newNodes.push({ type: 'text', value: token }) |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + const onlyText = |
| 92 | + newNodes.length === 1 && |
| 93 | + newNodes[0].type === 'text' && |
| 94 | + (newNodes[0] as Text).value === text |
| 95 | + if (!onlyText) { |
| 96 | + const p = parent as Element |
| 97 | + if (!Array.isArray(p.children)) return |
| 98 | + |
| 99 | + const i = p.children.indexOf(node) |
| 100 | + if (i !== -1) { |
| 101 | + p.children.splice(i, 1, ...newNodes) |
| 102 | + } |
| 103 | + } |
| 104 | + }) |
| 105 | + } |
| 106 | +} |
0 commit comments