OSDN Git Service

TweetExtractorの返すエンティティのインデックスがサロゲートペアを考慮できていない不具合を修正
authorKimura Youichi <kim.upsilon@bucyou.net>
Sat, 13 Oct 2018 12:13:43 +0000 (21:13 +0900)
committerKimura Youichi <kim.upsilon@bucyou.net>
Sat, 13 Oct 2018 12:22:56 +0000 (21:22 +0900)
テストケースに「✨」を使用していたが、これは U+2728 で基本多言語面にある文字なのでサロゲートペアのテストとして適切ではなかった

Fixes: 33d5a834 ("テキストからURLを抽出してTwitterEntityUrlとして出力するExtractUrlEntitiesメソッドを追加")

OpenTween.Tests/ExtensionsTest.cs
OpenTween.Tests/TweetExtractorTest.cs
OpenTween/Extensions.cs
OpenTween/Resources/ChangeLog.txt
OpenTween/TweetExtractor.cs

index c7d07c9..ead27b4 100644 (file)
@@ -88,6 +88,26 @@ namespace OpenTween
             Assert.Throws<ArgumentOutOfRangeException>(() => "a".GetCodepointAtSafe(1));
         }
 
+        [Theory]
+        [InlineData("", 0, 0, 0)]
+        [InlineData("sushi 🍣", 0, 8, 7)]
+        [InlineData("sushi 🍣", 0, 5, 5)]
+        [InlineData("sushi 🍣", 6, 8, 1)]
+        [InlineData("sushi 🍣", 6, 7, 1)] // サロゲートペアの境界を跨ぐ範囲 (LowSurrogate が無い)
+        [InlineData("sushi 🍣", 7, 8, 1)] // サロゲートペアの境界を跨ぐ範囲 (HighSurrogate が無い)
+        public void GetCodepointCount_Test(string str, int start, int end, int expected)
+            => Assert.Equal(expected, str.GetCodepointCount(start, end));
+
+        [Fact]
+        public void GetCodepointCount_ErrorTest()
+        {
+            Assert.Throws<ArgumentNullException>(() => ((string)null).GetCodepointCount(0, 0));
+            Assert.Throws<ArgumentOutOfRangeException>(() => "abc".GetCodepointCount(-1, 3));
+            Assert.Throws<ArgumentOutOfRangeException>(() => "abc".GetCodepointCount(0, 4));
+            Assert.Throws<ArgumentOutOfRangeException>(() => "abc".GetCodepointCount(4, 5));
+            Assert.Throws<ArgumentOutOfRangeException>(() => "abc".GetCodepointCount(2, 1));
+        }
+
         [Fact]
         public async Task ForEachAsync_Test()
         {
index 15c58ec..6b0f37a 100644 (file)
@@ -78,7 +78,7 @@ namespace OpenTween
         [Fact]
         public void ExtractUrlEntities_SurrogatePairTest()
         {
-            var entity = TweetExtractor.ExtractUrlEntities("✨ http://example.com/ ✨").Single();
+            var entity = TweetExtractor.ExtractUrlEntities("🍣 http://example.com/ 🍣").Single();
 
             Assert.Equal(new[] { 2, 21 }, entity.Indices);
             Assert.Equal("http://example.com/", entity.Url);
@@ -145,6 +145,15 @@ namespace OpenTween
         }
 
         [Fact]
+        public void ExtractMentionEntities_SurrogatePairTest()
+        {
+            var entity = TweetExtractor.ExtractMentionEntities("🍣 @twitterapi").Single();
+
+            Assert.Equal(new[] { 2, 13 }, entity.Indices);
+            Assert.Equal("twitterapi", entity.ScreenName);
+        }
+
+        [Fact]
         public void ExtractHashtagEntities_Test()
         {
             var entity = TweetExtractor.ExtractHashtagEntities("hogehoge #test").Single();
@@ -165,5 +174,14 @@ namespace OpenTween
             Assert.Equal(new[] { 15, 21 }, entities[1].Indices);
             Assert.Equal("test2", entities[1].Text);
         }
+
+        [Fact]
+        public void ExtractHashtagEntities_SurrogatePairTest()
+        {
+            var entity = TweetExtractor.ExtractHashtagEntities("🍣 #sushi").Single();
+
+            Assert.Equal(new[] { 2, 8 }, entity.Indices);
+            Assert.Equal("sushi", entity.Text);
+        }
     }
 }
index a854c2e..63329ff 100644 (file)
@@ -86,6 +86,30 @@ namespace OpenTween
             return s[index];
         }
 
+        /// <summary>
+        /// 指定された部分文字列のコードポイント単位での文字数を返す
+        /// </summary>
+        /// <param name="s">文字列</param>
+        /// <param name="start">開始位置</param>
+        /// <param name="end">終了位置</param>
+        public static int GetCodepointCount(this string s, int start, int end)
+        {
+            if (s == null)
+                throw new ArgumentNullException(nameof(s));
+            if (start < 0 || start > s.Length)
+                throw new ArgumentOutOfRangeException(nameof(start));
+            if (end < 0 || end > s.Length)
+                throw new ArgumentOutOfRangeException(nameof(end));
+            if (start > end)
+                throw new ArgumentOutOfRangeException(nameof(start));
+
+            var count = 0;
+            for (var i = start; i < end; i += char.IsSurrogatePair(s, i) ? 2 : 1)
+                count++;
+
+            return count;
+        }
+
         public static Task ForEachAsync<T>(this IObservable<T> observable, Action<T> subscriber)
             => ForEachAsync(observable, value => { subscriber(value); return Task.CompletedTask; });
 
index 8e67bad..0d26d65 100644 (file)
@@ -1,6 +1,7 @@
 更新履歴
 
 ==== Ver 2.1.3-dev(2018/xx/xx)
+ * FIX: ユーザー情報ダイアログでbioに絵文字を含む場合にハッシュタグやメンションのリンク範囲がずれる不具合を修正
 
 ==== Ver 2.1.2(2018/09/30)
  * CHG: 投稿時取得の設定がオフでも、投稿した内容を即時タイムラインに反映させるようにしました
index 1bbdca4..b2af7a9 100644 (file)
@@ -83,8 +83,9 @@ namespace OpenTween
 
                 if (validUrl)
                 {
-                    var startPos = m.Groups["url"].Index;
-                    var endPos = startPos + m.Groups["url"].Length;
+                    var urlGroup = m.Groups["url"];
+                    var startPos = text.GetCodepointCount(0, urlGroup.Index);
+                    var endPos = startPos + text.GetCodepointCount(urlGroup.Index, urlGroup.Index + urlGroup.Length);
 
                     yield return new TwitterEntityUrl
                     {
@@ -106,8 +107,8 @@ namespace OpenTween
             var matchesAtList = Regex.Matches(text, @"(?<=^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20}/[a-zA-Z][a-zA-Z0-9\p{IsLatin-1Supplement}\-]{0,79})");
             foreach (var match in matchesAtList.Cast<Match>())
             {
-                var startPos = match.Index;
-                var endPos = startPos + match.Length;
+                var startPos = text.GetCodepointCount(0, match.Index);
+                var endPos = startPos + text.GetCodepointCount(match.Index, match.Index + match.Length);
 
                 yield return new TwitterEntityMention
                 {
@@ -120,8 +121,8 @@ namespace OpenTween
             var matchesAtUser = Regex.Matches(text, "(?<=^|[^a-zA-Z0-9_/])([@@][a-zA-Z0-9_]{1,20})(?=[^a-zA-Z0-9_/]|$)");
             foreach (var match in matchesAtUser.Cast<Match>())
             {
-                var startPos = match.Index;
-                var endPos = startPos + match.Length;
+                var startPos = text.GetCodepointCount(0, match.Index);
+                var endPos = startPos + text.GetCodepointCount(match.Index, match.Index + match.Length);
 
                 yield return new TwitterEntityMention
                 {
@@ -141,8 +142,8 @@ namespace OpenTween
             {
                 var groupHashtagSharp = match.Groups[2];
                 var groupHashtagText = match.Groups[3];
-                var startPos = groupHashtagSharp.Index;
-                var endPos = startPos + groupHashtagSharp.Length + groupHashtagText.Length;
+                var startPos = text.GetCodepointCount(0, groupHashtagSharp.Index);
+                var endPos = startPos + text.GetCodepointCount(groupHashtagSharp.Index, groupHashtagSharp.Index + groupHashtagSharp.Length + groupHashtagText.Length);
 
                 yield return new TwitterEntityHashtag
                 {