From e397038ddca01ad199c3e761fdf418c7ab9ca148 Mon Sep 17 00:00:00 2001 From: Kaj Koivunen <kalakoiv@jyu.fi> Date: Mon, 13 Mar 2023 18:53:38 +0200 Subject: [PATCH] =?UTF-8?q?lis=C3=A4=C3=A4=20testej=C3=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/kotlin/DictionaryParser.kt | 13 ++- src/test/kotlin/DictionaryParserTest.kt | 115 ++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 src/test/kotlin/DictionaryParserTest.kt diff --git a/src/main/kotlin/DictionaryParser.kt b/src/main/kotlin/DictionaryParser.kt index 1b97020..a10bf0d 100644 --- a/src/main/kotlin/DictionaryParser.kt +++ b/src/main/kotlin/DictionaryParser.kt @@ -2,6 +2,7 @@ package guru.kake.ronove import guru.kake.xmlp.XMLParser import java.io.FileInputStream +import java.io.InputStream import javax.xml.stream.XMLEventReader import javax.xml.stream.XMLInputFactory import javax.xml.stream.events.XMLEvent @@ -73,15 +74,21 @@ class DictionaryParser private constructor() { private val xmlInputFactory: XMLInputFactory = XMLInputFactory.newInstance() /** - * Parses Kanjidic2 as provided by EDRDG + * Parses Kanjidic2 as provided by EDRDG from a file found in path * See https://www.edrdg.org/wiki/index.php/KANJIDIC_Project */ - fun parseKanjidic2(path: String): List<Character> { + fun parseKanjidic2(path: String): List<Character> = parseKanjidic2(FileInputStream(path)) + + /** + * Parses Kanjidic2 as provided by EDRDG from any [InputStream] + * See https://www.edrdg.org/wiki/index.php/KANJIDIC_Project + */ + fun parseKanjidic2(stream: InputStream) : List<Character> { val parsed: MutableList<Character> = mutableListOf() var character: Character? = null var rmgroup: RMGroup? = null - XMLParser.parse(path) { + XMLParser.parse(stream) { element("character") { start { character = Character() } end { diff --git a/src/test/kotlin/DictionaryParserTest.kt b/src/test/kotlin/DictionaryParserTest.kt new file mode 100644 index 0000000..87810fa --- /dev/null +++ b/src/test/kotlin/DictionaryParserTest.kt @@ -0,0 +1,115 @@ +import guru.kake.ronove.DictionaryParser.Companion.parseKanjidic2 +import guru.kake.ronove.RMGroup +import guru.kake.ronove.Sense +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals + +class DictionaryParserTest { + + @Test fun testCharacter() { + val c1 = guru.kake.ronove.Character("猫") + val c2 = guru.kake.ronove.Character("猫") + c2.rmgroups.add(RMGroup()) + val c3 = guru.kake.ronove.Character("犬") + val c4 = guru.kake.ronove.Character("é³¥") + val s = "cat" + assert(c1 == c2) + assert(c1 != c3) + assert(!c1.equals(s)) + assert(!c1.equals(null)) + assert(c1 > c3) + assert(c1 < c4) + assert(c1.hashCode() == c2.hashCode()) + assert(c1.hashCode() != c3.hashCode()) + } + + @Test fun testPhrase() { + val p1 = guru.kake.ronove.Phrase("ãã“") + val p2 = guru.kake.ronove.Phrase("ãã“") + p2.senses.add(Sense()) + val p3 = guru.kake.ronove.Phrase("ã„ã¬") + val p4 = guru.kake.ronove.Phrase("ã‚ãŸã—") + val s = "cat" + assert(p1 == p2) + assert(p1 != p3) + assert(!p1.equals(s)) + assert(!p1.equals(null)) + assert(p1 > p3) + assert(p1 < p4) + assert(p1.hashCode() == p2.hashCode()) + assert(p1.hashCode() != p3.hashCode()) + } + + @Test fun testParseKanjidic2() { + val result = parseKanjidic2(kanjidic2sample.byteInputStream()) + assertEquals("猫", result[0].literal) + assertEquals("ビョウ", result[0].rmgroups[0].reading.find { it.first == "ja_on" }?.second) + assertEquals("ãã“", result[0].rmgroups[0].reading.find { it.first == "ja_kun" }?.second) + assertEquals("cat", result[0].rmgroups[0].meaning.find { it.first == null }?.second) + assertEquals("gato", result[0].rmgroups[0].meaning.find { it.first == "es" }?.second) + } + + companion object { + private val kanjidic2sample = + "<!-- Entry for Kanji: 猫 -->\n" + + "<character>\n" + + "<literal>猫</literal>\n" + + "<codepoint>\n" + + "<cp_value cp_type=\"ucs\">732b</cp_value>\n" + + "<cp_value cp_type=\"jis208\">1-39-13</cp_value>\n" + + "</codepoint>\n" + + "<radical>\n" + + "<rad_value rad_type=\"classical\">94</rad_value>\n" + + "</radical>\n" + + "<misc>\n" + + "<grade>8</grade>\n" + + "<stroke_count>11</stroke_count>\n" + + "<variant var_type=\"jis212\">1-63-05</variant>\n" + + "<freq>1702</freq>\n" + + "<jlpt>2</jlpt>\n" + + "</misc>\n" + + "<dic_number>\n" + + "<dic_ref dr_type=\"nelson_c\">2893</dic_ref>\n" + + "<dic_ref dr_type=\"nelson_n\">3586</dic_ref>\n" + + "<dic_ref dr_type=\"halpern_njecd\">535</dic_ref>\n" + + "<dic_ref dr_type=\"halpern_kkd\">651</dic_ref>\n" + + "<dic_ref dr_type=\"halpern_kkld\">391</dic_ref>\n" + + "<dic_ref dr_type=\"halpern_kkld_2ed\">488</dic_ref>\n" + + "<dic_ref dr_type=\"heisig\">244</dic_ref>\n" + + "<dic_ref dr_type=\"heisig6\">259</dic_ref>\n" + + "<dic_ref dr_type=\"gakken\">1763</dic_ref>\n" + + "<dic_ref dr_type=\"oneill_names\">1304</dic_ref>\n" + + "<dic_ref dr_type=\"moro\" m_vol=\"7\" m_page=\"0719\">20535X</dic_ref>\n" + + "<dic_ref dr_type=\"henshall\">1742</dic_ref>\n" + + "<dic_ref dr_type=\"sh_kk\">1470</dic_ref>\n" + + "<dic_ref dr_type=\"sh_kk2\">1567</dic_ref>\n" + + "<dic_ref dr_type=\"jf_cards\">730</dic_ref>\n" + + "<dic_ref dr_type=\"tutt_cards\">1461</dic_ref>\n" + + "<dic_ref dr_type=\"kanji_in_context\">1410</dic_ref>\n" + + "<dic_ref dr_type=\"kodansha_compact\">1304</dic_ref>\n" + + "<dic_ref dr_type=\"maniette\">250</dic_ref>\n" + + "</dic_number>\n" + + "<query_code>\n" + + "<q_code qc_type=\"skip\">1-3-8</q_code>\n" + + "<q_code qc_type=\"sh_desc\">3g8.5</q_code>\n" + + "<q_code qc_type=\"four_corner\">4426.0</q_code>\n" + + "<q_code qc_type=\"deroo\">2976</q_code>\n" + + "</query_code>\n" + + "<reading_meaning>\n" + + "<rmgroup>\n" + + "<reading r_type=\"pinyin\">mao1</reading>\n" + + "<reading r_type=\"pinyin\">mao2</reading>\n" + + "<reading r_type=\"korean_r\">myo</reading>\n" + + "<reading r_type=\"korean_h\">묘</reading>\n" + + "<reading r_type=\"vietnam\">Miêu</reading>\n" + + "<reading r_type=\"ja_on\">ビョウ</reading>\n" + + "<reading r_type=\"ja_kun\">ãã“</reading>\n" + + "<meaning>cat</meaning>\n" + + "<meaning m_lang=\"fr\">chat</meaning>\n" + + "<meaning m_lang=\"es\">gato</meaning>\n" + + "<meaning m_lang=\"pt\">Gato</meaning>\n" + + "</rmgroup>\n" + + "</reading_meaning>\n" + + "</character>" + } +} \ No newline at end of file -- GitLab