Skip to content

Commit ce2fa53

Browse files
authored
[MR] markdown render with html sanitizer (#10300)
* Merged PR 2525: Introduce html Sanitizer * Merged PR 2534: Sign third party packages
1 parent 3e6294d commit ce2fa53

File tree

8 files changed

+77
-37
lines changed

8 files changed

+77
-37
lines changed

Directory.Packages.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
<PackageVersion Include="EntityFramework" Version="6.4.4" />
2929
<PackageVersion Include="FluentAssertions" Version="5.5.0" />
3030
<PackageVersion Include="FluentLinkChecker" Version="1.0.0.10" />
31+
<PackageVersion Include="HtmlSanitizer" Version="8.1.870" />
3132
<PackageVersion Include="Knapcode.MiniZip" Version="0.20.0" />
3233
<PackageVersion Include="LibGit2Sharp" Version="0.26.0" />
3334
<PackageVersion Include="Lucene.Net.Contrib" Version="3.0.3" />

NuGet.config

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
<clear />
1313
<packageSource key="NuGet.org">
1414
<package pattern="Antlr" />
15+
<package pattern="AngleSharp.*" />
16+
<package pattern="AngleSharp" />
1517
<package pattern="Autofac.*" />
1618
<package pattern="Autofac" />
1719
<package pattern="Azure.*" />
@@ -26,6 +28,7 @@
2628
<package pattern="FluentAssertions" />
2729
<package pattern="FluentLinkChecker" />
2830
<package pattern="HtmlAgilityPack" />
31+
<package pattern="HtmlSanitizer" />
2932
<package pattern="Hyak.Common" />
3033
<package pattern="Knapcode.MiniZip" />
3134
<package pattern="LibGit2Sharp.NativeBinaries" />

sign.thirdparty.props

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
<Project>
22
<ItemGroup>
3+
<ThirdPartyBinaries Include="AngleSharp.dll" />
4+
<ThirdPartyBinaries Include="AngleSharp.Css.dll" />
35
<ThirdPartyBinaries Include="AnglicanGeek.MarkdownMailer.dll" />
46
<ThirdPartyBinaries Include="Antlr3.Runtime.dll" />
57
<ThirdPartyBinaries Include="Autofac.dll" />
@@ -16,6 +18,7 @@
1618
<ThirdPartyBinaries Include="Elmah.dll" />
1719
<ThirdPartyBinaries Include="git2-572e4d8.dll" />
1820
<ThirdPartyBinaries Include="HtmlAgilityPack.dll" />
21+
<ThirdPartyBinaries Include="HtmlSanitizer.dll" />
1922
<ThirdPartyBinaries Include="ICSharpCode.SharpZipLib.dll" />
2023
<ThirdPartyBinaries Include="json-ld.net.StrongName.dll" />
2124
<ThirdPartyBinaries Include="Knapcode.MiniZip.dll" />

src/NuGetGallery/App_Start/DefaultDependenciesModule.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
using Autofac;
2121
using Autofac.Core;
2222
using Autofac.Extensions.DependencyInjection;
23+
using Ganss.Xss;
2324
using Microsoft.ApplicationInsights.Extensibility;
2425
using Microsoft.ApplicationInsights.Extensibility.Implementation;
2526
using Microsoft.Extensions.DependencyInjection;
@@ -132,6 +133,7 @@ protected override void Load(ContainerBuilder builder)
132133

133134
services.AddSingleton(loggerFactory);
134135
services.AddSingleton(typeof(ILogger<>), typeof(Logger<>));
136+
services.AddSingleton<IHtmlSanitizer, HtmlSanitizer>();
135137

136138
UrlHelperExtensions.SetConfigurationService(configuration);
137139
builder.RegisterType<UrlHelperWrapper>()

src/NuGetGallery/NuGetGallery.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2204,6 +2204,7 @@
22042204
<PackageReference Include="NuGet.StrongName.elmah.sqlserver" />
22052205
<PackageReference Include="NuGet.StrongName.elmah" />
22062206
<PackageReference Include="EntityFramework" />
2207+
<PackageReference Include="HtmlSanitizer" />
22072208
<PackageReference Include="Lucene.Net" />
22082209
<PackageReference Include="Lucene.Net.Contrib" />
22092210
<PackageReference Include="Microsoft.ApplicationInsights.TraceListener" />

src/NuGetGallery/Services/MarkdownService.cs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) .NET Foundation. All rights reserved.
1+
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
33

44
using System;
@@ -7,6 +7,7 @@
77
using System.Web;
88
using CommonMark;
99
using CommonMark.Syntax;
10+
using Ganss.Xss;
1011
using Markdig;
1112
using Markdig.Extensions.EmphasisExtras;
1213
using Markdig.Renderers;
@@ -20,19 +21,38 @@ public class MarkdownService : IMarkdownService
2021
private static readonly TimeSpan RegexTimeout = TimeSpan.FromMinutes(1);
2122
private static readonly Regex EncodedBlockQuotePattern = new Regex("^ {0,3}&gt;", RegexOptions.Multiline, RegexTimeout);
2223
private static readonly Regex LinkPattern = new Regex("<a href=([\"\']).*?\\1", RegexOptions.None, RegexTimeout);
23-
private static readonly Regex JavaScriptPattern = new Regex("<a href=([\"\'])javascript:.*?\\1 rel=([\"'])noopener noreferrer nofollow\\1>", RegexOptions.None, RegexTimeout);
2424
private static readonly Regex HtmlCommentPattern = new Regex("<!--.*?-->", RegexOptions.Singleline, RegexTimeout);
2525
private static readonly Regex ImageTextPattern = new Regex("!\\[\\]\\(", RegexOptions.Singleline, RegexTimeout);
26-
private static readonly string altTextForImage = "alternate text is missing from this package README image";
26+
private static readonly string AltTextForImage = "alternate text is missing from this package README image";
2727

2828
private readonly IFeatureFlagService _features;
2929
private readonly IImageDomainValidator _imageDomainValidator;
30+
private readonly IHtmlSanitizer _htmlSanitizer;
3031

3132
public MarkdownService(IFeatureFlagService features,
32-
IImageDomainValidator imageDomainValidator)
33+
IImageDomainValidator imageDomainValidator,
34+
IHtmlSanitizer htmlSanitizer)
3335
{
3436
_features = features ?? throw new ArgumentNullException(nameof(features));
3537
_imageDomainValidator = imageDomainValidator ?? throw new ArgumentNullException(nameof(imageDomainValidator));
38+
_htmlSanitizer = htmlSanitizer ?? throw new ArgumentNullException(nameof(htmlSanitizer));
39+
SanitizerSettings();
40+
}
41+
42+
private void SanitizerSettings()
43+
{
44+
//Configure allowed tags, attributes for the sanitizer
45+
_htmlSanitizer.AllowedAttributes.Add("id");
46+
_htmlSanitizer.AllowedAttributes.Add("class");
47+
}
48+
49+
private string SanitizeText(string input)
50+
{
51+
if (!string.IsNullOrWhiteSpace(input))
52+
{
53+
return _htmlSanitizer.Sanitize(input);
54+
}
55+
return input;
3656
}
3757

3858
public RenderedMarkdownResult GetHtmlFromMarkdown(string markdownString)
@@ -42,6 +62,7 @@ public RenderedMarkdownResult GetHtmlFromMarkdown(string markdownString)
4262
throw new ArgumentNullException(nameof(markdownString));
4363
}
4464

65+
4566
if (_features.IsMarkdigMdRenderingEnabled())
4667
{
4768
return GetHtmlFromMarkdownMarkdig(markdownString, 1);
@@ -179,7 +200,9 @@ private RenderedMarkdownResult GetHtmlFromMarkdownCommonMark(string markdownStri
179200
using (var htmlWriter = new StringWriter())
180201
{
181202
CommonMarkConverter.ProcessStage3(document, htmlWriter, settings);
182-
output.Content = LinkPattern.Replace(htmlWriter.ToString(), "$0" + " rel=\"noopener noreferrer nofollow\"").Trim();
203+
string htmlContent = htmlWriter.ToString();
204+
htmlContent = SanitizeText(htmlContent);
205+
output.Content = LinkPattern.Replace(htmlContent, "$0" + " rel=\"noopener noreferrer nofollow\"").Trim();
183206

184207
return output;
185208
}
@@ -197,7 +220,7 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
197220

198221
var markdownWithoutComments = HtmlCommentPattern.Replace(markdownString, "");
199222

200-
var markdownWithImageAlt = ImageTextPattern.Replace(markdownWithoutComments, $"![{altTextForImage}](");
223+
var markdownWithImageAlt = ImageTextPattern.Replace(markdownWithoutComments, $"![{AltTextForImage}](");
201224

202225
var markdownWithoutBom = markdownWithImageAlt.TrimStart('\ufeff');
203226

@@ -286,10 +309,10 @@ private RenderedMarkdownResult GetHtmlFromMarkdownMarkdig(string markdownString,
286309
renderer.Render(document);
287310
output.Content = htmlWriter.ToString().Trim();
288311
output.IsMarkdigMdSyntaxHighlightEnabled = _features.IsMarkdigMdSyntaxHighlightEnabled();
289-
output.Content = JavaScriptPattern.Replace(htmlWriter.ToString(), "").Trim();
312+
output.Content = SanitizeText(output.Content);
290313

291314
return output;
292315
}
293316
}
294317
}
295-
}
318+
}

src/NuGetGallery/Web.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,10 @@
738738
<assemblyIdentity name="Autofac" publicKeyToken="17863AF14B0044DA" culture="neutral"/>
739739
<bindingRedirect oldVersion="0.0.0.0-4.9.1.0" newVersion="4.9.1.0"/>
740740
</dependentAssembly>
741+
<dependentAssembly>
742+
<assemblyIdentity name="AngleSharp" publicKeyToken="e83494dcdc6d31ea" culture="neutral"/>
743+
<bindingRedirect oldVersion="0.0.0.0-0.17.1.0" newVersion="0.17.1.0"/>
744+
</dependentAssembly>
741745
</assemblyBinding>
742746
</runtime>
743747
</configuration>

0 commit comments

Comments
 (0)