@@ -27,45 +27,61 @@ let internal encodeHighUnicode (text: string) =
2727 if String.IsNullOrEmpty text then
2828 text
2929 else
30- // Fast path: check if string needs encoding at all
31- let needsEncoding =
32- text
33- |> Seq.exists ( fun c ->
34- let codePoint = int c
35- Char.IsSurrogate c || ( codePoint >= 0x2000 && codePoint <= 0x2BFF ))
36-
37- if not needsEncoding then
38- text
39- else
40- // Tail-recursive function with StringBuilder accumulator
41- let rec processChars i ( sb : System.Text.StringBuilder ) =
42- if i >= text.Length then
43- sb.ToString()
30+ // Single-pass encoding with lazy StringBuilder allocation
31+ let mutable sb : System.Text.StringBuilder voption = ValueNone
32+ let mutable i = 0
33+
34+ while i < text.Length do
35+ let c = text.[ i]
36+
37+ let needsEncoding , codePoint , skipNext =
38+ // Check for surrogate pairs first (emojis and other characters outside BMP)
39+ if
40+ Char.IsHighSurrogate c
41+ && i + 1 < text.Length
42+ && Char.IsLowSurrogate text.[ i + 1 ]
43+ then
44+ let fullCodePoint = Char.ConvertToUtf32( c, text.[ i + 1 ])
45+ // Encode all characters outside BMP (>= 0x10000) as they're typically emojis
46+ true , fullCodePoint, true
4447 else
45- let c = text.[ i]
46- // Check for surrogate pairs first (emojis and other characters outside BMP)
47- if
48- Char.IsHighSurrogate c
49- && i + 1 < text.Length
50- && Char.IsLowSurrogate( text.[ i + 1 ])
51- then
52- let fullCodePoint = Char.ConvertToUtf32( c, text.[ i + 1 ])
53- // Encode all characters outside BMP (>= 0x10000) as they're typically emojis
54- sb.Append( sprintf " &#%d ;" fullCodePoint) |> ignore
55- processChars ( i + 2 ) sb // Skip both surrogate chars
56- else
57- let codePoint = int c
58- // Encode specific ranges that contain emojis and symbols:
59- // U+2000-U+2BFF: General Punctuation, Superscripts, Currency, Dingbats, Arrows, Math, Technical, Box Drawing, etc.
60- // U+1F000-U+1FFFF: Supplementary Multilingual Plane emojis (handled above via surrogates)
61- if codePoint >= 0x2000 && codePoint <= 0x2BFF then
62- sb.Append( sprintf " &#%d ;" codePoint) |> ignore
63- else
64- sb.Append c |> ignore
65-
66- processChars ( i + 1 ) sb
67-
68- processChars 0 ( System.Text.StringBuilder text.Length)
48+ let codePoint = int c
49+ // Encode specific ranges that contain emojis and symbols:
50+ // U+2000-U+2BFF: General Punctuation, Superscripts, Currency, Dingbats, Arrows, Math, Technical, Box Drawing, etc.
51+ // U+1F000-U+1FFFF: Supplementary Multilingual Plane emojis (handled above via surrogates)
52+ ( codePoint >= 0x2000 && codePoint <= 0x2BFF ), codePoint, false
53+
54+ if needsEncoding then
55+ // Lazy initialization of StringBuilder only when needed
56+ match sb with
57+ | ValueNone ->
58+ let builder = System.Text.StringBuilder( text.Length + 16 )
59+
60+ if i > 0 then
61+ builder.Append( text, 0 , i) |> ignore
62+
63+ sb <- ValueSome builder
64+ | ValueSome _ -> ()
65+
66+ // Append HTML entity without using sprintf (avoid allocation)
67+ match sb with
68+ | ValueSome builder ->
69+ builder.Append " &#" |> ignore
70+ builder.Append codePoint |> ignore
71+ builder.Append ';' |> ignore
72+ | ValueNone -> ()
73+ else
74+ // Only append to StringBuilder if it was already initialized
75+ match sb with
76+ | ValueSome builder -> builder.Append c |> ignore
77+ | ValueNone -> ()
78+
79+ i <- i + ( if skipNext then 2 else 1 )
80+
81+ // Return original string if no encoding was needed
82+ match sb with
83+ | ValueNone -> text
84+ | ValueSome builder -> builder.ToString()
6985
7086/// Basic escaping as done by Markdown including quotes
7187let internal htmlEncodeQuotes ( code : string ) =
0 commit comments