From 2b1a915eadbf26320f2c0baf988abc434102816d Mon Sep 17 00:00:00 2001 From: Bond-009 Date: Thu, 5 May 2022 19:59:17 +0200 Subject: [PATCH] Merge pull request #7604 from Jellifi007/fixes-diactritics Co-authored-by: Cody Robibero (cherry picked from commit 8d1d9734381472b301deb0118bbb8da2a769a65e) Signed-off-by: crobibero --- Emby.Dlna/Service/BaseControlHandler.cs | 2 +- .../Data/SqliteItemRepository.cs | 1 - .../Library/SearchEngine.cs | 2 +- Jellyfin.Drawing.Skia/SkiaEncoder.cs | 2 +- .../Entities/Audio/MusicArtist.cs | 2 +- .../Entities/Audio/MusicGenre.cs | 2 +- MediaBrowser.Controller/Entities/BaseItem.cs | 1 - MediaBrowser.Controller/Entities/Genre.cs | 2 +- MediaBrowser.Controller/Entities/Person.cs | 2 +- MediaBrowser.Controller/Entities/Studio.cs | 2 +- .../Library/NameExtensions.cs | 2 +- .../MediaBrowser.Controller.csproj | 1 - .../Manager/MetadataService.cs | 2 +- .../MusicBrainz/MusicBrainzArtistProvider.cs | 2 +- src/Jellyfin.Extensions/StringExtensions.cs | 42 +++++++++++++++++++ .../StringExtensionsTests.cs | 32 ++++++++++++++ 16 files changed, 85 insertions(+), 14 deletions(-) diff --git a/Emby.Dlna/Service/BaseControlHandler.cs b/Emby.Dlna/Service/BaseControlHandler.cs index 7bec2eb728..9c423b3958 100644 --- a/Emby.Dlna/Service/BaseControlHandler.cs +++ b/Emby.Dlna/Service/BaseControlHandler.cs @@ -6,8 +6,8 @@ using System.IO; using System.Text; using System.Threading.Tasks; using System.Xml; -using Diacritics.Extensions; using Emby.Dlna.Didl; +using Jellyfin.Extensions; using MediaBrowser.Controller.Configuration; using Microsoft.Extensions.Logging; diff --git a/Emby.Server.Implementations/Data/SqliteItemRepository.cs b/Emby.Server.Implementations/Data/SqliteItemRepository.cs index cdc14c2605..6c243050d4 100644 --- a/Emby.Server.Implementations/Data/SqliteItemRepository.cs +++ b/Emby.Server.Implementations/Data/SqliteItemRepository.cs @@ -11,7 +11,6 @@ using System.Linq; using System.Text; using System.Text.Json; using System.Threading; -using Diacritics.Extensions; using Emby.Server.Implementations.Playlists; using Jellyfin.Data.Enums; using Jellyfin.Extensions; diff --git a/Emby.Server.Implementations/Library/SearchEngine.cs b/Emby.Server.Implementations/Library/SearchEngine.cs index 96702d1520..60778a443e 100644 --- a/Emby.Server.Implementations/Library/SearchEngine.cs +++ b/Emby.Server.Implementations/Library/SearchEngine.cs @@ -5,9 +5,9 @@ using System; using System.Collections.Generic; using System.Linq; -using Diacritics.Extensions; using Jellyfin.Data.Entities; using Jellyfin.Data.Enums; +using Jellyfin.Extensions; using MediaBrowser.Controller.Dto; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Library; diff --git a/Jellyfin.Drawing.Skia/SkiaEncoder.cs b/Jellyfin.Drawing.Skia/SkiaEncoder.cs index 1fa8e570da..2358fe6238 100644 --- a/Jellyfin.Drawing.Skia/SkiaEncoder.cs +++ b/Jellyfin.Drawing.Skia/SkiaEncoder.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.Globalization; using System.IO; using BlurHashSharp.SkiaSharp; -using Diacritics.Extensions; +using Jellyfin.Extensions; using MediaBrowser.Common.Configuration; using MediaBrowser.Common.Extensions; using MediaBrowser.Controller.Drawing; diff --git a/MediaBrowser.Controller/Entities/Audio/MusicArtist.cs b/MediaBrowser.Controller/Entities/Audio/MusicArtist.cs index 0f2d7e62dc..15a79fa1fc 100644 --- a/MediaBrowser.Controller/Entities/Audio/MusicArtist.cs +++ b/MediaBrowser.Controller/Entities/Audio/MusicArtist.cs @@ -8,9 +8,9 @@ using System.Linq; using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; -using Diacritics.Extensions; using Jellyfin.Data.Entities; using Jellyfin.Data.Enums; +using Jellyfin.Extensions; using MediaBrowser.Controller.Providers; using MediaBrowser.Model.Entities; using Microsoft.Extensions.Logging; diff --git a/MediaBrowser.Controller/Entities/Audio/MusicGenre.cs b/MediaBrowser.Controller/Entities/Audio/MusicGenre.cs index 73a25232e4..7448d02ea5 100644 --- a/MediaBrowser.Controller/Entities/Audio/MusicGenre.cs +++ b/MediaBrowser.Controller/Entities/Audio/MusicGenre.cs @@ -5,8 +5,8 @@ using System; using System.Collections.Generic; using System.Text.Json.Serialization; -using Diacritics.Extensions; using Jellyfin.Data.Enums; +using Jellyfin.Extensions; using Microsoft.Extensions.Logging; namespace MediaBrowser.Controller.Entities.Audio diff --git a/MediaBrowser.Controller/Entities/BaseItem.cs b/MediaBrowser.Controller/Entities/BaseItem.cs index 2bb966d2c5..5cee6ce406 100644 --- a/MediaBrowser.Controller/Entities/BaseItem.cs +++ b/MediaBrowser.Controller/Entities/BaseItem.cs @@ -11,7 +11,6 @@ using System.Text; using System.Text.Json.Serialization; using System.Threading; using System.Threading.Tasks; -using Diacritics.Extensions; using Jellyfin.Data.Entities; using Jellyfin.Data.Enums; using Jellyfin.Extensions; diff --git a/MediaBrowser.Controller/Entities/Genre.cs b/MediaBrowser.Controller/Entities/Genre.cs index 4be6732372..ddf62dd4cb 100644 --- a/MediaBrowser.Controller/Entities/Genre.cs +++ b/MediaBrowser.Controller/Entities/Genre.cs @@ -5,8 +5,8 @@ using System; using System.Collections.Generic; using System.Text.Json.Serialization; -using Diacritics.Extensions; using Jellyfin.Data.Enums; +using Jellyfin.Extensions; using Microsoft.Extensions.Logging; namespace MediaBrowser.Controller.Entities diff --git a/MediaBrowser.Controller/Entities/Person.cs b/MediaBrowser.Controller/Entities/Person.cs index 045c1b89fd..7f265084fb 100644 --- a/MediaBrowser.Controller/Entities/Person.cs +++ b/MediaBrowser.Controller/Entities/Person.cs @@ -5,7 +5,7 @@ using System; using System.Collections.Generic; using System.Text.Json.Serialization; -using Diacritics.Extensions; +using Jellyfin.Extensions; using MediaBrowser.Controller.Providers; using Microsoft.Extensions.Logging; diff --git a/MediaBrowser.Controller/Entities/Studio.cs b/MediaBrowser.Controller/Entities/Studio.cs index c8feb1c946..a3736a4bfc 100644 --- a/MediaBrowser.Controller/Entities/Studio.cs +++ b/MediaBrowser.Controller/Entities/Studio.cs @@ -5,7 +5,7 @@ using System; using System.Collections.Generic; using System.Text.Json.Serialization; -using Diacritics.Extensions; +using Jellyfin.Extensions; using Microsoft.Extensions.Logging; namespace MediaBrowser.Controller.Entities diff --git a/MediaBrowser.Controller/Library/NameExtensions.cs b/MediaBrowser.Controller/Library/NameExtensions.cs index d2ed3465a8..9d78b8b6c8 100644 --- a/MediaBrowser.Controller/Library/NameExtensions.cs +++ b/MediaBrowser.Controller/Library/NameExtensions.cs @@ -3,7 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; -using Diacritics.Extensions; +using Jellyfin.Extensions; namespace MediaBrowser.Controller.Library { diff --git a/MediaBrowser.Controller/MediaBrowser.Controller.csproj b/MediaBrowser.Controller/MediaBrowser.Controller.csproj index 3cab4366d3..2368706fe1 100644 --- a/MediaBrowser.Controller/MediaBrowser.Controller.csproj +++ b/MediaBrowser.Controller/MediaBrowser.Controller.csproj @@ -18,7 +18,6 @@ - diff --git a/MediaBrowser.Providers/Manager/MetadataService.cs b/MediaBrowser.Providers/Manager/MetadataService.cs index 0c52d26736..6d767914f7 100644 --- a/MediaBrowser.Providers/Manager/MetadataService.cs +++ b/MediaBrowser.Providers/Manager/MetadataService.cs @@ -8,7 +8,7 @@ using System.Linq; using System.Net.Http; using System.Threading; using System.Threading.Tasks; -using Diacritics.Extensions; +using Jellyfin.Extensions; using MediaBrowser.Controller.Configuration; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Entities.Audio; diff --git a/MediaBrowser.Providers/Plugins/MusicBrainz/MusicBrainzArtistProvider.cs b/MediaBrowser.Providers/Plugins/MusicBrainz/MusicBrainzArtistProvider.cs index 1feb7f4eab..906a42f36d 100644 --- a/MediaBrowser.Providers/Plugins/MusicBrainz/MusicBrainzArtistProvider.cs +++ b/MediaBrowser.Providers/Plugins/MusicBrainz/MusicBrainzArtistProvider.cs @@ -13,7 +13,7 @@ using System.Text; using System.Threading; using System.Threading.Tasks; using System.Xml; -using Diacritics.Extensions; +using Jellyfin.Extensions; using MediaBrowser.Controller.Entities.Audio; using MediaBrowser.Controller.Providers; using MediaBrowser.Model.Entities; diff --git a/src/Jellyfin.Extensions/StringExtensions.cs b/src/Jellyfin.Extensions/StringExtensions.cs index 3a77072539..dadc9f1d5c 100644 --- a/src/Jellyfin.Extensions/StringExtensions.cs +++ b/src/Jellyfin.Extensions/StringExtensions.cs @@ -1,4 +1,8 @@ using System; +using System.Diagnostics; +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; namespace Jellyfin.Extensions { @@ -7,6 +11,44 @@ namespace Jellyfin.Extensions /// public static class StringExtensions { + // Matches non-conforming unicode chars + // https://mnaoumov.wordpress.com/2014/06/14/stripping-invalid-characters-from-utf-16-strings/ + private static readonly Regex _nonConformingUnicode = new Regex("([\ud800-\udbff](?![\udc00-\udfff]))|((? + /// Removes the diacritics character from the strings. + /// + /// The string to act on. + /// The string without diacritics character. + public static string RemoveDiacritics(this string text) + { + string withDiactritics = _nonConformingUnicode + .Replace(text, string.Empty) + .Normalize(NormalizationForm.FormD); + + var withoutDiactritics = new StringBuilder(); + foreach (char c in withDiactritics) + { + UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(c); + if (uc != UnicodeCategory.NonSpacingMark) + { + withoutDiactritics.Append(c); + } + } + + return withoutDiactritics.ToString().Normalize(NormalizationForm.FormC); + } + + /// + /// Checks wether or not the specified string has diacritics in it. + /// + /// The string to check. + /// True if the string has diacritics, false otherwise. + public static bool HasDiacritics(this string text) + { + return !string.Equals(text, text.RemoveDiacritics(), StringComparison.Ordinal); + } + /// /// Counts the number of occurrences of [needle] in the string. /// diff --git a/tests/Jellyfin.Extensions.Tests/StringExtensionsTests.cs b/tests/Jellyfin.Extensions.Tests/StringExtensionsTests.cs index 7186cc0236..903d88caa1 100644 --- a/tests/Jellyfin.Extensions.Tests/StringExtensionsTests.cs +++ b/tests/Jellyfin.Extensions.Tests/StringExtensionsTests.cs @@ -5,6 +5,38 @@ namespace Jellyfin.Extensions.Tests { public class StringExtensionsTests { + [Theory] + [InlineData("", "")] // Identity edge-case (no diactritics) + [InlineData("Indiana Jones", "Indiana Jones")] // Identity (no diactritics) + [InlineData("a\ud800b", "ab")] // Invalid UTF-16 char stripping + [InlineData("Jön", "Jon")] // Issue #7484 + [InlineData("Jönssonligan", "Jonssonligan")] // Issue #7484 + [InlineData("Kieślowski", "Kieslowski")] // Issue #7450 + [InlineData("Cidadão Kane", "Cidadao Kane")] // Issue #7560 + [InlineData("운명처럼 널 사랑해", "운명처럼 널 사랑해")] // Issue #6393 (Korean language support) + [InlineData("애타는 로맨스", "애타는 로맨스")] // Issue #6393 + public void RemoveDiacritics_ValidInput_Corrects(string input, string expectedResult) + { + string result = input.RemoveDiacritics(); + Assert.Equal(expectedResult, result); + } + + [Theory] + [InlineData("", false)] // Identity edge-case (no diactritics) + [InlineData("Indiana Jones", false)] // Identity (no diactritics) + [InlineData("a\ud800b", true)] // Invalid UTF-16 char stripping + [InlineData("Jön", true)] // Issue #7484 + [InlineData("Jönssonligan", true)] // Issue #7484 + [InlineData("Kieślowski", true)] // Issue #7450 + [InlineData("Cidadão Kane", true)] // Issue #7560 + [InlineData("운명처럼 널 사랑해", false)] // Issue #6393 (Korean language support) + [InlineData("애타는 로맨스", false)] // Issue #6393 + public void HasDiacritics_ValidInput_Corrects(string input, bool expectedResult) + { + bool result = input.HasDiacritics(); + Assert.Equal(expectedResult, result); + } + [Theory] [InlineData("", '_', 0)] [InlineData("___", '_', 3)]