From 945930d75e60d57bc17e8de0d7988c36a84cf284 Mon Sep 17 00:00:00 2001 From: cxfksword <718792+cxfksword@users.noreply.github.com> Date: Sat, 25 May 2024 16:00:19 +0800 Subject: [PATCH] fix: episode number not correctly identified. #82 --- AnitomySharp/Keyword.cs | 4 +- AnitomySharp/ParserHelper.cs | 31 +++++++++++-- AnitomySharp/ParserNumber.cs | 29 +++++++++++-- .../ParseNameTest.cs | 43 +++++++++++++------ Jellyfin.Plugin.MetaShark/Core/NameParser.cs | 22 ++++++++-- .../Core/StringExtension.cs | 5 +++ .../Providers/EpisodeProvider.cs | 10 ++--- 7 files changed, 113 insertions(+), 31 deletions(-) diff --git a/AnitomySharp/Keyword.cs b/AnitomySharp/Keyword.cs index a0adcae..f5bab41 100644 --- a/AnitomySharp/Keyword.cs +++ b/AnitomySharp/Keyword.cs @@ -67,7 +67,7 @@ static KeywordManager() "番外編", "總集編","DRAMA", "映像特典","特典","特典アニメ", // 特典 Special 剩下的各种类型可以全部命名成 SP,对于较特殊意义的特典也可以自定义命名 - "SPECIAL", "SPECIALS", "SP", "SPs", + "SPECIAL", "SPECIALS", "SP", "SPs", "特報", // 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。 "IV", // 音乐视频 Music Video @@ -85,7 +85,7 @@ static KeywordManager() // 无字 OP/ED Non-Credit Opening/Ending "ED", "ENDING", "NCED", "NCOP", "OP", "OPENING", // 预告 Preview 预告下一话内容 注意编号表示其预告的是第几话的内容而不是跟在哪一话后面 - "PREVIEW", "YOKOKU", + "PREVIEW", "YOKOKU", "予告", // 菜单 Menu BD/DVD 播放选择菜单 "MENU", // 广告 Commercial Message 电视放送广告,时长一般在 7s/15s/30s/45s/... 左右 diff --git a/AnitomySharp/ParserHelper.cs b/AnitomySharp/ParserHelper.cs index 904666e..1b6f1f5 100644 --- a/AnitomySharp/ParserHelper.cs +++ b/AnitomySharp/ParserHelper.cs @@ -119,6 +119,29 @@ public static string GetNumberFromOrdinal(string str) if (string.IsNullOrEmpty(str)) return ""; return Ordinals.TryGetValue(str, out var foundString) ? foundString : ""; } + /// + /// 转换原始值中的全角数字 + /// 1234567890 + /// + /// + /// + public static string GetNumberFromFullWidth(string str) + { + string output = str; + for (int i = 0; i < str.Length; i++) + { + if (char.IsDigit(str[i])) + { + int fullwidthDigit = (int)str[i]; + if (fullwidthDigit >= 65296 && fullwidthDigit <= 65305) + { + int halfwidthDigit = fullwidthDigit - 65248; + output = output.Replace(str[i], (char)halfwidthDigit); + } + } + } + return output; + } /// /// Returns the index of the first digit in the str; -1 otherwise. @@ -273,7 +296,7 @@ public bool IsNextTokenContainAnimeType(int pos) var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); if (!IsTokenCategory(prevToken, Token.TokenCategory.Bracket)) return false; var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); - if (nextToken < 0) return false; + if (!Token.InListRange(prevToken, _parser.Tokens) || !Token.InListRange(nextToken, _parser.Tokens)) return false; return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[nextToken].Content); } /// @@ -285,8 +308,8 @@ public bool IsPrevTokenContainAnimeType(int pos) { var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + if(!Token.InListRange(prevToken, _parser.Tokens)||!Token.InListRange(nextToken, _parser.Tokens)) return false; if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false; - if (prevToken < 0) return false; return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content); } /// @@ -298,8 +321,8 @@ public bool IsPrevTokenContainAnimeTypeInPeekEntries(int pos) { var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + if (!Token.InListRange(prevToken, _parser.Tokens) || !Token.InListRange(nextToken, _parser.Tokens)) return false; if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false; - if (prevToken < 0) return false; return KeywordManager.ContainsInPeekEntries(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content); } @@ -435,4 +458,4 @@ public void BuildElement(Element.ElementCategory category, bool keepDelimiters, } } } -} +} \ No newline at end of file diff --git a/AnitomySharp/ParserNumber.cs b/AnitomySharp/ParserNumber.cs index de3174f..b3f5347 100644 --- a/AnitomySharp/ParserNumber.cs +++ b/AnitomySharp/ParserNumber.cs @@ -513,7 +513,8 @@ public bool MatchJapaneseCounterPattern(string word, Token token) return true; } - regexPattern = @"([第全]?)([0-9一二三四五六七八九十壱弐参]+)([期章話话巻卷幕夜期発縛])"; + // 全角数字:\uFF10-\uFF19 + regexPattern = @"([第全]?)([0-9一二三四五六七八九十壱弐参\uFF10-\uFF19]+)([回集話话幕夜発縛])"; match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase); if (match.Success) { @@ -522,11 +523,33 @@ public bool MatchJapaneseCounterPattern(string word, Token token) { episodeNumber = ParserHelper.GetNumberFromOrdinal(episodeNumber); } + episodeNumber = ParserHelper.GetNumberFromFullWidth(episodeNumber); + SetEpisodeNumber(episodeNumber, token, false); + return true; + } + regexPattern = @"([第全]?)([0-9一二三四五六七八九十壱弐参\uFF10-\uFF19]+)([期章巻卷])"; + match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase); + if (match.Success) + { + var episodeNumber = match.Groups[2].Value; + if (!StringHelper.IsNumericString(episodeNumber)) + { + episodeNumber = ParserHelper.GetNumberFromOrdinal(episodeNumber); + } + episodeNumber = ParserHelper.GetNumberFromFullWidth(episodeNumber); SetEpisodeNumber(episodeNumber, token, false); return true; } - regexPattern = @"(vol|EPISODE|ACT|scene|ep|volume|screen|voice|case|menu|rail|round|game|page|collection|cage|office|doll|Princess)([ \.\-_])([0-9]+)"; + regexPattern = @"(EPISODE|ACT|scene|ep|screen|voice|case|menu|rail|round|game|page|collection|cage|office|doll|Princess)([ \.\-_])([0-9]+)"; + match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase); + if (match.Success) + { + var episodeNumber = match.Groups[3].Value; + SetEpisodeNumber(episodeNumber, token, false); + return true; + } + regexPattern = @"(vol|volume)([ \.\-_])([0-9]+)"; match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase); if (match.Success) { @@ -874,4 +897,4 @@ public bool SearchForLastNumber(List tokens) return false; } } -} +} \ No newline at end of file diff --git a/Jellyfin.Plugin.MetaShark.Test/ParseNameTest.cs b/Jellyfin.Plugin.MetaShark.Test/ParseNameTest.cs index c8bd63a..83583f6 100644 --- a/Jellyfin.Plugin.MetaShark.Test/ParseNameTest.cs +++ b/Jellyfin.Plugin.MetaShark.Test/ParseNameTest.cs @@ -167,23 +167,36 @@ public void TestTVSeriesParse() [TestMethod] public void TestEposideParse() { + // 普通数字 + var fileName = "03.mp4"; + var parseResult = NameParser.ParseEpisode(fileName); + Assert.AreEqual(parseResult.Name, "03"); + Assert.AreEqual(parseResult.ParentIndexNumber, null); + Assert.AreEqual(parseResult.IndexNumber, 3); + + fileName = "03 4K.mp4"; + parseResult = NameParser.ParseEpisode(fileName); + Assert.AreEqual(parseResult.Name, "03"); + Assert.AreEqual(parseResult.ParentIndexNumber, null); + Assert.AreEqual(parseResult.IndexNumber, 3); + // 混合中英文 - var fileName = "新世界.New.World.2013.BluRay.1080p.x265.10bit.MNHD-FRDS"; - var parseResult = NameParser.Parse(fileName); + fileName = "新世界.New.World.2013.BluRay.1080p.x265.10bit.MNHD-FRDS"; + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.ChineseName, "新世界"); Assert.AreEqual(parseResult.Name, "New World"); Assert.AreEqual(parseResult.Year, 2013); // 只英文 S01E01 fileName = "She-Hulk.Attorney.At.Law.S01E01.1080p.WEBRip.x265-RARBG"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "She-Hulk Attorney At Law"); Assert.AreEqual(parseResult.ParentIndexNumber, 1); Assert.AreEqual(parseResult.IndexNumber, 1); // 测试 SXXEPXX 格式 fileName = "神探狄仁杰2 Detective.Dee.Ⅱ.S02EP02.2006.2160p.WEB-DL.x264.AAC-HQC"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.ChineseName, "神探狄仁杰2"); Assert.AreEqual(parseResult.Name, "Detective Dee Ⅱ"); Assert.AreEqual(parseResult.ParentIndexNumber, 2); @@ -192,26 +205,26 @@ public void TestEposideParse() // 日文 fileName = "プロポーズ大作戦Ep05_x264.mp4"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "プロポーズ大作戦Ep05"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 5); fileName = "[01] [ANK-Raws] あっちこっち 01 (BDrip 1920x1080 HEVC-YUV420P10 FLAC)"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "あっちこっち 01"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 1); // 只中文 fileName = "齊天大聖 第02集"; - parseResult = NameParser.Parse(fileName); - Assert.AreEqual(parseResult.Name, "齊天大聖 第02集"); + parseResult = NameParser.ParseEpisode(fileName); + Assert.AreEqual(parseResult.Name, "齊天大聖"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 2); fileName = "齊天大聖 第 02 期"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "齊天大聖"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 2); @@ -219,38 +232,40 @@ public void TestEposideParse() // anime fileName = "[YYDM-11FANS][THERMAE_ROMAE][02][BDRIP][720P][X264-10bit_AAC][7FF2269F]"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "THERMAE ROMAE"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 2); // anime带季数 fileName = "[WMSUB][Detective Conan - Zero‘s Tea Time ][S01][E06][BIG5][1080P].mp4"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "Detective Conan - Zero‘s Tea Time"); Assert.AreEqual(parseResult.ParentIndexNumber, 1); Assert.AreEqual(parseResult.IndexNumber, 6); fileName = "[KTXP][Machikado_Mazoku_S2][01][BIG5][1080p]"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "Machikado Mazoku"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 1); fileName = "[異域字幕組][她和她的貓 - Everything Flows -][She and Her Cat - Everything Flows -][01][720p][繁體]"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.AreEqual(parseResult.Name, "她和她的貓 - Everything Flows"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, 1); // anime特典 fileName = "[KissSub][Steins;Gate][SP][GB_BIG5_JP][BDrip][1080P][HEVC] 边界曲面的缺失之环"; - parseResult = NameParser.Parse(fileName); + parseResult = NameParser.ParseEpisode(fileName); Assert.IsTrue(parseResult.IsSpecial); Assert.AreEqual(parseResult.Name, "边界曲面的缺失之环"); Assert.AreEqual(parseResult.ParentIndexNumber, null); Assert.AreEqual(parseResult.IndexNumber, null); + + } diff --git a/Jellyfin.Plugin.MetaShark/Core/NameParser.cs b/Jellyfin.Plugin.MetaShark/Core/NameParser.cs index e8bf797..394bf4b 100644 --- a/Jellyfin.Plugin.MetaShark/Core/NameParser.cs +++ b/Jellyfin.Plugin.MetaShark/Core/NameParser.cs @@ -108,7 +108,7 @@ public static ParseNameResult Parse(string fileName, bool isEpisode = false) } } - // 假如Anitomy解析不到year,尝试使用jellyfin默认parser,看能不能解析成功 + // 假如 Anitomy 解析不到 year,尝试使用 jellyfin 默认 parser,看能不能解析成功 if (parseResult.Year == null && !isAnime) { var nativeParseResult = ParseMovieByDefault(fileName); @@ -118,13 +118,22 @@ public static ParseNameResult Parse(string fileName, bool isEpisode = false) } } + // 假如 Anitomy 解析不到集数,判断 name 是否是数字集号 + if (parseResult.IndexNumber is null && isEpisode) + { + if (!string.IsNullOrEmpty(parseResult.Name) && parseResult.Name.IsNumericString()) + { + parseResult.IndexNumber = parseResult.Name.ToInt(); + } + } + // 修复纯中文集数/特殊标识集数 if (parseResult.IndexNumber is null) { parseResult.IndexNumber = ParseChineseOrSpecialIndexNumber(fileName); } - // 解析不到title时,或解析出多个title时,使用默认名 + // 解析不到 title 时,或解析出多个 title 时,使用默认名 if (string.IsNullOrEmpty(parseResult.Name)) { parseResult.Name = fileName; @@ -133,6 +142,11 @@ public static ParseNameResult Parse(string fileName, bool isEpisode = false) return parseResult; } + public static ParseNameResult ParseEpisode(string fileName) + { + return Parse(fileName, true); + } + private static string CleanName(string name) { // 电视剧名称后紧跟季信息时,会附加到名称中,需要去掉 @@ -173,9 +187,11 @@ public static ParseNameResult ParseMovieByDefault(string fileName) /// public static EpisodePathParserResult ParseEpisodeByDefault(string fileName) { + // EpisodePathParser需要路径信息, 这里添加一个分隔符模拟路径 + var path = Path.DirectorySeparatorChar + fileName; var nameOptions = new Emby.Naming.Common.NamingOptions(); return new EpisodePathParser(nameOptions) - .Parse(fileName, false); + .Parse(path, false); } diff --git a/Jellyfin.Plugin.MetaShark/Core/StringExtension.cs b/Jellyfin.Plugin.MetaShark/Core/StringExtension.cs index f4cb7c0..21f6bf8 100644 --- a/Jellyfin.Plugin.MetaShark/Core/StringExtension.cs +++ b/Jellyfin.Plugin.MetaShark/Core/StringExtension.cs @@ -78,5 +78,10 @@ public static string GetMatchGroup(this string text, Regex reg) return string.Empty; } + + public static bool IsNumericString(this string str) + { + return str.All(char.IsDigit); + } } } diff --git a/Jellyfin.Plugin.MetaShark/Providers/EpisodeProvider.cs b/Jellyfin.Plugin.MetaShark/Providers/EpisodeProvider.cs index 734dc96..b22ad44 100644 --- a/Jellyfin.Plugin.MetaShark/Providers/EpisodeProvider.cs +++ b/Jellyfin.Plugin.MetaShark/Providers/EpisodeProvider.cs @@ -135,7 +135,7 @@ public EpisodeInfo FixParseInfo(EpisodeInfo info) { // 使用AnitomySharp进行重新解析,解决anime识别错误 var fileName = Path.GetFileNameWithoutExtension(info.Path) ?? info.Name; - var parseResult = NameParser.Parse(fileName); + var parseResult = NameParser.ParseEpisode(fileName); info.Year = parseResult.Year; info.Name = parseResult.ChineseName ?? parseResult.Name; @@ -192,14 +192,14 @@ public EpisodeInfo FixParseInfo(EpisodeInfo info) // info.ParentIndexNumber = 1; // } - // 特典优先使用文件名(特典除了前面特别设置,还有SXX/Season XX等默认的) + // 特典优先使用文件名(特典除了前面特别设置,还有 SXX/Season XX 等默认的) if (info.ParentIndexNumber.HasValue && info.ParentIndexNumber == 0) { info.Name = parseResult.SpecialName == info.Name ? fileName : parseResult.SpecialName; } - // 大于1000,可能错误解析了分辨率 - if (parseResult.IndexNumber.HasValue && parseResult.IndexNumber < 1000 && info.IndexNumber != parseResult.IndexNumber) + // 修正 episode number + if (parseResult.IndexNumber.HasValue && info.IndexNumber != parseResult.IndexNumber) { this.Log("FixEpisodeNumber by anitomy. old: {0} new: {1}", info.IndexNumber, parseResult.IndexNumber); info.IndexNumber = parseResult.IndexNumber; @@ -214,7 +214,7 @@ public EpisodeInfo FixParseInfo(EpisodeInfo info) { // 特典或extra视频可能和正片剧集放在同一目录 var fileName = Path.GetFileNameWithoutExtension(info.Path) ?? info.Name; - var parseResult = NameParser.Parse(fileName); + var parseResult = NameParser.ParseEpisode(fileName); if (parseResult.IsExtra) { this.Log($"Found anime extra of [name]: {fileName}");