1- // Copyright (c) .NET Foundation. All rights reserved.
1+ // Copyright (c) .NET Foundation. All rights reserved.
22// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
33
44using System ;
77using System . Web ;
88using CommonMark ;
99using CommonMark . Syntax ;
10+ using Ganss . Xss ;
1011using Markdig ;
1112using Markdig . Extensions . EmphasisExtras ;
1213using Markdig . Renderers ;
@@ -20,19 +21,38 @@ public class MarkdownService : IMarkdownService
2021 private static readonly TimeSpan RegexTimeout = TimeSpan . FromMinutes ( 1 ) ;
2122 private static readonly Regex EncodedBlockQuotePattern = new Regex ( "^ {0,3}>" , RegexOptions . Multiline , RegexTimeout ) ;
2223 private static readonly Regex LinkPattern = new Regex ( "<a href=([\" \' ]).*?\\ 1" , RegexOptions . None , RegexTimeout ) ;
23- private static readonly Regex JavaScriptPattern = new Regex ( "<a href=([\" \' ])javascript:.*?\\ 1 rel=([\" '])noopener noreferrer nofollow\\ 1>" , RegexOptions . None , RegexTimeout ) ;
2424 private static readonly Regex HtmlCommentPattern = new Regex ( "<!--.*?-->" , RegexOptions . Singleline , RegexTimeout ) ;
2525 private static readonly Regex ImageTextPattern = new Regex ( "!\\ [\\ ]\\ (" , RegexOptions . Singleline , RegexTimeout ) ;
26- private static readonly string altTextForImage = "alternate text is missing from this package README image" ;
26+ private static readonly string AltTextForImage = "alternate text is missing from this package README image" ;
2727
2828 private readonly IFeatureFlagService _features ;
2929 private readonly IImageDomainValidator _imageDomainValidator ;
30+ private readonly IHtmlSanitizer _htmlSanitizer ;
3031
3132 public MarkdownService ( IFeatureFlagService features ,
32- IImageDomainValidator imageDomainValidator )
33+ IImageDomainValidator imageDomainValidator ,
34+ IHtmlSanitizer htmlSanitizer )
3335 {
3436 _features = features ?? throw new ArgumentNullException ( nameof ( features ) ) ;
3537 _imageDomainValidator = imageDomainValidator ?? throw new ArgumentNullException ( nameof ( imageDomainValidator ) ) ;
38+ _htmlSanitizer = htmlSanitizer ?? throw new ArgumentNullException ( nameof ( htmlSanitizer ) ) ;
39+ SanitizerSettings ( ) ;
40+ }
41+
42+ private void SanitizerSettings ( )
43+ {
44+ //Configure allowed tags, attributes for the sanitizer
45+ _htmlSanitizer . AllowedAttributes . Add ( "id" ) ;
46+ _htmlSanitizer . AllowedAttributes . Add ( "class" ) ;
47+ }
48+
49+ private string SanitizeText ( string input )
50+ {
51+ if ( ! string . IsNullOrWhiteSpace ( input ) )
52+ {
53+ return _htmlSanitizer . Sanitize ( input ) ;
54+ }
55+ return input ;
3656 }
3757
3858 public RenderedMarkdownResult GetHtmlFromMarkdown ( string markdownString )
@@ -42,6 +62,7 @@ public RenderedMarkdownResult GetHtmlFromMarkdown(string markdownString)
4262 throw new ArgumentNullException ( nameof ( markdownString ) ) ;
4363 }
4464
65+
4566 if ( _features . IsMarkdigMdRenderingEnabled ( ) )
4667 {
4768 return GetHtmlFromMarkdownMarkdig ( markdownString , 1 ) ;
@@ -179,7 +200,9 @@ private RenderedMarkdownResult GetHtmlFromMarkdownCommonMark(string markdownStri
179200 using ( var htmlWriter = new StringWriter ( ) )
180201 {
181202 CommonMarkConverter . ProcessStage3 ( document , htmlWriter , settings ) ;
182- output . Content = LinkPattern . Replace ( htmlWriter . ToString ( ) , "$0" + " rel=\" noopener noreferrer nofollow\" " ) . Trim ( ) ;
203+ string htmlContent = htmlWriter . ToString ( ) ;
204+ htmlContent = SanitizeText ( htmlContent ) ;
205+ output . Content = LinkPattern . Replace ( htmlContent , "$0" + " rel=\" noopener noreferrer nofollow\" " ) . Trim ( ) ;
183206
184207 return output ;
185208 }
@@ -197,7 +220,7 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
197220
198221 var markdownWithoutComments = HtmlCommentPattern . Replace ( markdownString , "" ) ;
199222
200- var markdownWithImageAlt = ImageTextPattern . Replace ( markdownWithoutComments , $ " ;
223+ var markdownWithImageAlt = ImageTextPattern . Replace ( markdownWithoutComments , $ " ;
201224
202225 var markdownWithoutBom = markdownWithImageAlt . TrimStart ( '\ufeff ' ) ;
203226
@@ -286,10 +309,10 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
286309 renderer . Render ( document ) ;
287310 output . Content = htmlWriter . ToString ( ) . Trim ( ) ;
288311 output . IsMarkdigMdSyntaxHighlightEnabled = _features . IsMarkdigMdSyntaxHighlightEnabled ( ) ;
289- output . Content = JavaScriptPattern . Replace ( htmlWriter . ToString ( ) , "" ) . Trim ( ) ;
312+ output . Content = SanitizeText ( output . Content ) ;
290313
291314 return output ;
292315 }
293316 }
294317 }
295- }
318+ }
0 commit comments