From e397038ddca01ad199c3e761fdf418c7ab9ca148 Mon Sep 17 00:00:00 2001
From: Kaj Koivunen <kalakoiv@jyu.fi>
Date: Mon, 13 Mar 2023 18:53:38 +0200
Subject: [PATCH] =?UTF-8?q?lis=C3=A4=C3=A4=20testej=C3=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/main/kotlin/DictionaryParser.kt     |  13 ++-
 src/test/kotlin/DictionaryParserTest.kt | 115 ++++++++++++++++++++++++
 2 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 src/test/kotlin/DictionaryParserTest.kt

diff --git a/src/main/kotlin/DictionaryParser.kt b/src/main/kotlin/DictionaryParser.kt
index 1b97020..a10bf0d 100644
--- a/src/main/kotlin/DictionaryParser.kt
+++ b/src/main/kotlin/DictionaryParser.kt
@@ -2,6 +2,7 @@ package guru.kake.ronove
 
 import guru.kake.xmlp.XMLParser
 import java.io.FileInputStream
+import java.io.InputStream
 import javax.xml.stream.XMLEventReader
 import javax.xml.stream.XMLInputFactory
 import javax.xml.stream.events.XMLEvent
@@ -73,15 +74,21 @@ class DictionaryParser private constructor() {
         private val xmlInputFactory: XMLInputFactory = XMLInputFactory.newInstance()
 
         /**
-         * Parses Kanjidic2 as provided by EDRDG
+         * Parses Kanjidic2 as provided by EDRDG from a file found in path
          * See https://www.edrdg.org/wiki/index.php/KANJIDIC_Project
          */
-        fun parseKanjidic2(path: String): List<Character> {
+        fun parseKanjidic2(path: String): List<Character> = parseKanjidic2(FileInputStream(path))
+
+        /**
+         * Parses Kanjidic2 as provided by EDRDG from any [InputStream]
+         * See https://www.edrdg.org/wiki/index.php/KANJIDIC_Project
+         */
+        fun parseKanjidic2(stream: InputStream) : List<Character> {
             val parsed: MutableList<Character> = mutableListOf()
             var character: Character? = null
             var rmgroup: RMGroup? = null
 
-            XMLParser.parse(path) {
+            XMLParser.parse(stream) {
                 element("character") {
                     start { character = Character() }
                     end {
diff --git a/src/test/kotlin/DictionaryParserTest.kt b/src/test/kotlin/DictionaryParserTest.kt
new file mode 100644
index 0000000..87810fa
--- /dev/null
+++ b/src/test/kotlin/DictionaryParserTest.kt
@@ -0,0 +1,115 @@
+import guru.kake.ronove.DictionaryParser.Companion.parseKanjidic2
+import guru.kake.ronove.RMGroup
+import guru.kake.ronove.Sense
+import org.junit.jupiter.api.Test
+import kotlin.test.assertEquals
+
+class DictionaryParserTest {
+
+    @Test fun testCharacter() {
+        val c1 = guru.kake.ronove.Character("猫")
+        val c2 = guru.kake.ronove.Character("猫")
+        c2.rmgroups.add(RMGroup())
+        val c3 = guru.kake.ronove.Character("犬")
+        val c4 = guru.kake.ronove.Character("é³¥")
+        val s = "cat"
+        assert(c1 == c2)
+        assert(c1 != c3)
+        assert(!c1.equals(s))
+        assert(!c1.equals(null))
+        assert(c1 > c3)
+        assert(c1 < c4)
+        assert(c1.hashCode() == c2.hashCode())
+        assert(c1.hashCode() != c3.hashCode())
+    }
+
+    @Test fun testPhrase() {
+        val p1 = guru.kake.ronove.Phrase("ねこ")
+        val p2 = guru.kake.ronove.Phrase("ねこ")
+        p2.senses.add(Sense())
+        val p3 = guru.kake.ronove.Phrase("いぬ")
+        val p4 = guru.kake.ronove.Phrase("わたし")
+        val s = "cat"
+        assert(p1 == p2)
+        assert(p1 != p3)
+        assert(!p1.equals(s))
+        assert(!p1.equals(null))
+        assert(p1 > p3)
+        assert(p1 < p4)
+        assert(p1.hashCode() == p2.hashCode())
+        assert(p1.hashCode() != p3.hashCode())
+    }
+
+    @Test fun testParseKanjidic2() {
+        val result = parseKanjidic2(kanjidic2sample.byteInputStream())
+        assertEquals("猫", result[0].literal)
+        assertEquals("ビョウ", result[0].rmgroups[0].reading.find { it.first == "ja_on" }?.second)
+        assertEquals("ねこ", result[0].rmgroups[0].reading.find { it.first == "ja_kun" }?.second)
+        assertEquals("cat", result[0].rmgroups[0].meaning.find { it.first == null }?.second)
+        assertEquals("gato", result[0].rmgroups[0].meaning.find { it.first == "es" }?.second)
+    }
+
+    companion object {
+        private val kanjidic2sample =
+                "<!-- Entry for Kanji: 猫 -->\n" +
+                "<character>\n" +
+                "<literal>猫</literal>\n" +
+                "<codepoint>\n" +
+                "<cp_value cp_type=\"ucs\">732b</cp_value>\n" +
+                "<cp_value cp_type=\"jis208\">1-39-13</cp_value>\n" +
+                "</codepoint>\n" +
+                "<radical>\n" +
+                "<rad_value rad_type=\"classical\">94</rad_value>\n" +
+                "</radical>\n" +
+                "<misc>\n" +
+                "<grade>8</grade>\n" +
+                "<stroke_count>11</stroke_count>\n" +
+                "<variant var_type=\"jis212\">1-63-05</variant>\n" +
+                "<freq>1702</freq>\n" +
+                "<jlpt>2</jlpt>\n" +
+                "</misc>\n" +
+                "<dic_number>\n" +
+                "<dic_ref dr_type=\"nelson_c\">2893</dic_ref>\n" +
+                "<dic_ref dr_type=\"nelson_n\">3586</dic_ref>\n" +
+                "<dic_ref dr_type=\"halpern_njecd\">535</dic_ref>\n" +
+                "<dic_ref dr_type=\"halpern_kkd\">651</dic_ref>\n" +
+                "<dic_ref dr_type=\"halpern_kkld\">391</dic_ref>\n" +
+                "<dic_ref dr_type=\"halpern_kkld_2ed\">488</dic_ref>\n" +
+                "<dic_ref dr_type=\"heisig\">244</dic_ref>\n" +
+                "<dic_ref dr_type=\"heisig6\">259</dic_ref>\n" +
+                "<dic_ref dr_type=\"gakken\">1763</dic_ref>\n" +
+                "<dic_ref dr_type=\"oneill_names\">1304</dic_ref>\n" +
+                "<dic_ref dr_type=\"moro\" m_vol=\"7\" m_page=\"0719\">20535X</dic_ref>\n" +
+                "<dic_ref dr_type=\"henshall\">1742</dic_ref>\n" +
+                "<dic_ref dr_type=\"sh_kk\">1470</dic_ref>\n" +
+                "<dic_ref dr_type=\"sh_kk2\">1567</dic_ref>\n" +
+                "<dic_ref dr_type=\"jf_cards\">730</dic_ref>\n" +
+                "<dic_ref dr_type=\"tutt_cards\">1461</dic_ref>\n" +
+                "<dic_ref dr_type=\"kanji_in_context\">1410</dic_ref>\n" +
+                "<dic_ref dr_type=\"kodansha_compact\">1304</dic_ref>\n" +
+                "<dic_ref dr_type=\"maniette\">250</dic_ref>\n" +
+                "</dic_number>\n" +
+                "<query_code>\n" +
+                "<q_code qc_type=\"skip\">1-3-8</q_code>\n" +
+                "<q_code qc_type=\"sh_desc\">3g8.5</q_code>\n" +
+                "<q_code qc_type=\"four_corner\">4426.0</q_code>\n" +
+                "<q_code qc_type=\"deroo\">2976</q_code>\n" +
+                "</query_code>\n" +
+                "<reading_meaning>\n" +
+                "<rmgroup>\n" +
+                "<reading r_type=\"pinyin\">mao1</reading>\n" +
+                "<reading r_type=\"pinyin\">mao2</reading>\n" +
+                "<reading r_type=\"korean_r\">myo</reading>\n" +
+                "<reading r_type=\"korean_h\">묘</reading>\n" +
+                "<reading r_type=\"vietnam\">Miêu</reading>\n" +
+                "<reading r_type=\"ja_on\">ビョウ</reading>\n" +
+                "<reading r_type=\"ja_kun\">ねこ</reading>\n" +
+                "<meaning>cat</meaning>\n" +
+                "<meaning m_lang=\"fr\">chat</meaning>\n" +
+                "<meaning m_lang=\"es\">gato</meaning>\n" +
+                "<meaning m_lang=\"pt\">Gato</meaning>\n" +
+                "</rmgroup>\n" +
+                "</reading_meaning>\n" +
+                "</character>"
+    }
+}
\ No newline at end of file
-- 
GitLab