From 24d2a13242fced9a7caa95475f9e870b511d6874 Mon Sep 17 00:00:00 2001 From: planggard <63806188+planggard@users.noreply.github.com> Date: Wed, 30 Aug 2023 17:31:37 +0200 Subject: [PATCH] Problemer med LIRSUR --- src/cg3/disambiguator.cg3 | 24 ++++++++++++-------- src/fst/affixes/derivations-inflections.lexc | 10 ++++---- src/fst/stems/nouns.lexc | 18 +++++++-------- src/fst/stems/verbs.lexc | 4 ++-- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/src/cg3/disambiguator.cg3 b/src/cg3/disambiguator.cg3 index 16b8c2051..ef477d551 100644 --- a/src/cg3/disambiguator.cg3 +++ b/src/cg3/disambiguator.cg3 @@ -1707,7 +1707,7 @@ LIST = (/"paare"\ Gram/TV\ HTR\ Der/vv\ Gram/IV\ TUQ/l) "politeeq" (/"qamip"\ Gram/TV\ TIR\ Der/vv\ Gram/TV\ HTR\ Der/vv\ Gram/IV\ TAR\ Der/vv\ Gram/IV\ TUQ/l) -"qatserisartoq" +(/"qatser"\ Gram/TV\ HTR\ Der/vv\ Gram/IV\ TAR\ Der/vv\ \(Gram/IV\ \)?TUQ/l) "professor" "sakkutooq" (/"saqi"\ Gram/IV\ TUQ/l)("siulittar" HTR TUQ) @@ -12715,15 +12715,17 @@ REMOVE:Int0195B Prop + ("Kangerluk") IF (0 ("kangerluk") LINK *1 (/"qinngoq"\ PA REMOVE:Int0195C ("Qaammat") + Prop IF (0 ("qaammat")); #usandsynligt, lav undtagelser, når de kommer. -REMOVE:2221 N - Prop - iProp - Der/vv - ANIMAL IF (0 ("<\\p{Lu}.*>"r) LINK NOT 0 Orth/Arch)(*0C ("<\\p{Lu}.*>"r) - (Hyb/2-2))(NEGATE 0 FAM_UNIK OR TITEL OR HUM OR WEATHER OR NON-PROP)(NEGATE 0 Rel)(NEGATE 0 V)(NEGATE 0 ("ateq") LINK 1 Prop)# Select Prop hvis Wordform starter med stort, og der også er et andet Prop i sætningen. +REMOVE:2221A N - Prop - iProp - Der/vv - ANIMAL IF (0 ("<\\p{Lu}.*>"r) LINK NOT 0 Orth/Arch)(*0C ("<\\p{Lu}.*>"r) - (Hyb/2-2))(NEGATE 0 FAM_UNIK OR TITEL OR HUM OR WEATHER OR NON-PROP)(NEGATE 0 Rel)(NEGATE 0 V)(NEGATE 0 ("ateq") LINK 1 Prop)# Select Prop hvis Wordform starter med stort, og der også er et andet Prop i sætningen. (NEGATE 0 (/.*neq>"/r)) #Saassussineq (NEGATE 0 @SUBJ> LINK *0C ("<\\p{Lu}.*>"r) - @SUBJ> BARRIER @SUBJ>)#Må ikke vælge stochastisk Prop hvis appellativet ikke indgår syntagmatisk med det betingende Prop: PIVIUSULERSAARUT Nuka Bisgaardimut tunngasoq Space Rocket Nationimit suliarineqaleruttorpoq. (NEGATE 0 Orth/Copy LINK 0 N)#Politiit oqaaseqartartuata oqarnera naapertorlugu innuttaasut qimagussorniarneqarnerminni COVID-19-IMIK tunillatsinnissaminnik ernumallutik qimagussorneqarusussimanngikkaluarput. (NEGATE 0 ("Aggusti") + Prop + Lok LINK 0 MAANED) #AGGUSTIMILI Hjørringimi najugaqarpunga (NEGATE 0 ("Sofar")) #sofa (NEGATE 0 ("Tim")) #timi +(NEGATE -1 BOS LINK 1 _TARGET_ + &&IV_SUBJ& LINK *1 V + &&IV_SUBJ& BARRIER V)#NIPILERSOQATIGIISSUIMMI 18. septembarimi Katuami tusarnaartitsissapput, illoqarfiit pingaarnersaannit innuttaasut nipilersoqatigiissuit qaqutigoortumik tusarnaariarsinnaavaat. ; -#X@X + +REMOVE:2221B (/TUQ\ Der/vn\ VIK\ Der/nn\ N/l) IF (0 (/SUAQ\ Der/nn\ N/l))(-1 BOS LINK 1 _TARGET_ + &&IV_SUBJ& LINK *1 V + &&IV_SUBJ& BARRIER V); SELECT:2222 Prop + Lok IF (-1 Lok) ; @@ -13944,15 +13946,15 @@ REMOVE:8563F (/Gram/TV\ V\ Ind\ 3Pl\ 3PlO/l) IF (0 (/Gram/TV\ V\ Ind\ 3Pl\ 3SgO/ # ======================================================================= # # Disambiguering af enkeltord, der ikke kan disambigueres paa anden maade # ##### Nomen eller nomen 13863 -##### Nomen eller pron 14139 +##### Nomen eller pron 14240 ##### Pron eller pron 14146 ##### Nomen eller proprium 14150 ##### Nomen eller partikel 14158 ##### Nomen eller numerale 14271 ##### Nomen eller verbum 14314 ##### Verbum eller verbum 14389 -##### Enkelte derivativer 14884 -##### Enkelte substantiver 15042 +##### Enkelte derivativer 14890 +##### Enkelte substantiver 15223 # ======================================================================= # # Nomen eller nomen? @@ -13973,7 +13975,7 @@ SELECT:SeHumHum HUMAN - V IF (-*1 HUMVERB) (NEGATE 0 ("angakkoq" Pl) LINK *1 3PlO BARRIER V) (NEGATE 0 Sem/Hum + ("Danmark") LINK 0 Sem/Geo + ("Danmark") ) (NEGATE 0 ("ittu") LINK 0 (/"ip"\ Gram/IV\ TUQ/l) LINK -1 ("immikkut") OR ("ima") OR ("taama"))#Tamanna qinersinissap eqqarsaatigineqalerneranut aallartitsivoq, tamatumalu kingunerisaanik Inatsisartut immikkut ITTUMIK qinersisoqarnissaa isumaqatigiissutigaat. - +(NEGATE 0 ("inuiaqat") OR (/"inuiak"\ QATE/l)) ; SELECT:Se_illuNot_illoq ("illu") OR ("Illu") ; # "illu" mere sandsynlig end "illoq" SELECT:Se_ataata ("ataata") ; # mere sandsynlig end "ataataq" @@ -14231,6 +14233,7 @@ SELECT:8534CZ ("qulli") IF (0 ("qulleq") LINK *1 ("kuup")); #lamper kan ikke fly REMOVE:8534CÆ ("qaleq") IF (0 ("qaliaq") LINK NEGATE *0 ("nujaq")); REMOVE:8534CØ ("uluk") + SSAQ IF (0 ("uluak")); REMOVE:8534CÅ ("akoq") IF (0 ("aku")); +REMOVE:8534DA (/"inuk"\ IAR\ Der/nv\ Gram/TV\ Gram/Refl\ QATE/l) IF (0 ("inuiaqat") OR (/"inuiak"\ QATE/l)); REMOVE:8534D ("ilik") + VIK + QAR + VIK (0 ("iliveq") + QAR + VIK) ; REMOVE:8599 TUQ + Aeq - Der/vn IF (0 TUQ + Aeq + Der/vn)(NEGATE 1 ("ip")); #De særlige former af TUQ + Aeq på transitiver fulgt af "ippaa" slettes hvis ikke fulgt af "ippaa" @@ -15215,7 +15218,8 @@ REMOVE:5639CT (/"iller"\ Gram/TV\ NAR/l) IF (0 (/"illere"\ NAR/l)); REMOVE:5639CU (/"peqqip"\ Gram/IV\ SI/l) IF (0 ("peqqissi")); REMOVE:5639CV ("amerla") + (/NIQ\ Der/vn\ U/l) IF (0 NIRU); #- Taamaaliornikkut Nunatsinni suliffissat amerlissapput, aamma aningaasat Nunatsinni kaaviiaartut AMERLANERULISSAPPUT nunallu inui namminersortunngorsaanermi peqataatinneqalissallutik, Leif Fontaine allappoq. REMOVE:5639CW (/NIQ\ Der/vn\ U/l) IF (1C CONJ-C + NIRU); #Kalaallisut ordbogit PISOQAANERUSUT nutaanerusullu Oqaasileriffiup nittartagaaniilissapput. - +REMOVE:5639CX (/LIRSUR\ Der/nv\ Gram/IV\ VIP\ Der/vv\ Gram/IV\ VIK/l) IF (0 (/LIRSUR\ Der/nv\ Gram/TV\ HTR\ Der/vv\ Gram/IV\ VIK/l)); + # Enkelte substantiver REMOVE:Int0476 ("pineq") IF (NEGATE *0 ("kamik")) ; #20091103 pineq+QAR vs. pineqar- REMOVE:Int0477 ("illaaq") IF (NEGATE 0 POSSESSUM OR QAR OR LIK); #20091112 soedyrsfostre vist altid moderdyrets @@ -17328,9 +17332,9 @@ SELECT:Sem0137 ("qiteq") + (/^i?Sem/L$/r) IF (0 POSSESSUM3 + OBLIQUE LINK -1 Rel SELECT:Sem0138 ("qiteq") + (/^i?Sem/an/r) IF (0 POSSESSUM3 LINK -1 Rel + HUMAN); #Uima assai anaanama qitiani angalaarput, allaallumi timaani ukioqqortusisimasumi, nuluini eqisaluttuni angalaartarlutik. REMOVE:Sem0140 ("aallar") + (/^i?Sem/run/r) IF (0 (/^i?Sem/start_movement/r) + &&IV_SUBJ& LINK *-1 (/^i?Sem/H.*/r) + Abs + &&IV_SUBJ&) -(NEGATE *-1 ("politeeq") OR ("qatserisartoq")); +(NEGATE *-1 ("politeeq") OR (/"qatser"\ Gram/TV\ HTR\ Der/vv\ Gram/IV\ TAR\ Der/vv\ \(Gram/IV\ \)?TUQ/l)); REMOVE:Sem0141 ("aallar") + (/^i?Sem/start_movement/r) IF (0 (/^i?Sem/run/r) + &&IV_SUBJ& LINK NEGATE *-1 (/^i?Sem/H.*/r) + Abs + &&IV_SUBJ&); -SELECT:Sem0142 ("aallar") + (/^i?Sem/run/r) IF (0 (/^i?Sem/start_movement/r) + &&IV_SUBJ& LINK *-1 ("politeeq") OR ("qatserisartoq") + Abs + &&IV_SUBJ&); +SELECT:Sem0142 ("aallar") + (/^i?Sem/run/r) IF (0 (/^i?Sem/start_movement/r) + &&IV_SUBJ& LINK *-1 ("politeeq") OR (/"qatser"\ Gram/TV\ HTR\ Der/vv\ Gram/IV\ TAR\ Der/vv\ \(Gram/IV\ \)?TUQ/l) + Abs + &&IV_SUBJ&); REMOVE:Sem0143 ("aallar") + (/^i?Sem/start_movement.*/r) IF (0 ("aallar") + (/[12][SP][gl]/r) + (/^i?Sem/run.*/r)); SELECT:Sem0145 ("ulloq") + (/^i?Sem/dur/r) IF (0 ("ulloq") - (/^i?Sem/dur/r) LINK *0 (*)) diff --git a/src/fst/affixes/derivations-inflections.lexc b/src/fst/affixes/derivations-inflections.lexc index 8ebda1f79..dea11b5e7 100644 --- a/src/fst/affixes/derivations-inflections.lexc +++ b/src/fst/affixes/derivations-inflections.lexc @@ -5536,6 +5536,7 @@ LEXICON tptmorf !!= * @CODE@ +GIIT@U.Num.Pl@+Der/nn+PAK+Der/nn+ALUK+Der/nn:%>igiippaaluk@U.Num.Pl@ tup- ; +GIIT@U.Num.Pl@+Der/nn+PAK+Der/nn+SUAQ+Der/nn:%>igiippassuaq@U.Num.Pl@ Z2aqZ ; +GIIT@U.Num.Pl@+Der/nn+SSAQ+Der/nn:%>igiissaq@U.Num.Pl@ tp ; ++GIIT@U.Num.Pl@+Der/nn+SUAQ+Der/nn:%>igiissuaq@U.Num.Pl@ Z2aqZ ; +GIIT@U.Num.Pl@+Der/nn+TUQAQ+Der/nn:%>igiitoqaq@U.Num.Pl@ GEMS ; +GISSAAR+Der/nv+UTE+Der/vn:%>igissaarut tpt ; !undtagelsesvist medtaget i der-lexicon for at forebygge overgenerering med UTE+Der/vn +IAR+Der/nv+UTE+Der/vn:%>iiaat tpt ; @@ -8506,6 +8507,7 @@ LEXICON Z2-Zmorf !!= * @CODE@ derivationsmorfemer til tup- paa K. Jf. Z2-qZmorf +QAR+Der/nv+UTE+Der/vn:%>%TRUNCqaat tpt ; +QATE+Der/nn:%>%TRUNCqat tpt ; +QATE+Der/nn+GIIT+Der/nn:%>%TRUNCqatigiit GIIT ; ++QATE+Der/nn+GIIT@U.Num.Pl@+Der/nn+SUAQ+Der/nn:%>%TRUNCqatigiissuaq@U.Num.Pl@ Z2aqZ ; +QATE+Der/nn+NNGUAQ+Der/nn:%>%TRUNCqatinnguaq Z1nnguaqZ ; +QQURTUUQ+Der/nn:%>%TRUNCqqortooq tp ; +QQURTUUQ+Der/nn+SUAQ+Der/nn:%>%TRUNCqqortoorsuaq Z2aqZ ; @@ -99577,8 +99579,7 @@ LEXICON flex-iv2 !!= * @CODE@ +PALLAK+Der/vn:%>pallak Z2-Zmorf ; !14052020 panie. uinngiarpallannguamilluunniit +PALUK+Der/vn:%>paluk Z2-Zmorf ; +QATE+Der/vn:%>%TRUNCqat tptmorf ; -!20180709 PL QQAAQ+vn slettes helt. Af 983 hits i corpus er 983 forkerte! -!+QQAAQ+Der/vn:%>%TRUNCqqaaq Z1Zmorf ; !livm 170803 +!+QATIGIIT@U.Num.Pl@+Der/vn:%>%TRUNCqatigiit@U.Num.Pl@ tptmorf ; +QQAMMIQ+Der/vn:%>%TRUNCqqammeq Z1Zmorf ; !liv 170127 +QQINNAAQ+Der/vn:%>%TRUNCqqinnaaq Z1Zmorf ; !liv 170331 !+QQISSAAQ+Der/vn:%>%TRUNCqqissaaq Z1Zmorf ; @@ -110591,7 +110592,7 @@ LEXICON Z1eZ !!= * @CODE@ +LIRNGUSAATE@U.Num.Pl@+Der/nv+Gram/Reci:%>lerngusaap@U.Num.Pl@ XIuteXmorf ; +LIRI+Der/nv:%>leri XIi_voqXmorf ; +LIRSUR+Der/nv:%>lersor TV_UTE_gennemgang ; -!+LIRSUR+Der/nv+Gram/IV:%>lersor IV_r_UTE_gennemgang ; !PL20211015 må enten være transitiv eller refleksiv?? ++LIRSUR+Der/nv+Gram/IV:%>lersor IV_r_UTE_gennemgang ; +LISAAR+Der/nv:%>lisaar IV_r_stem ; +LISAAR+Der/nv+UTE+Der/vv:%>lisaarup HTR_UTE_SSAQ_ssi ; +LISAR+Der/nv:%>lisar IV_r_stem ; @@ -114514,12 +114515,9 @@ LEXICON GIIT !!= * @CODE@ +GIP+Der/nv:%>%TRUNCg2ip XIiXmorf_stem ; +GISSAAR+Der/nv:%>%TRUNCg2issaar IV_r_stem ; +GISSAAR+Der/nv+GI+Der/vv:%>%TRUNCg2issaari XIgujoqX ; -!+GIIP@U.Num.Pl@+Der/nv:%>g2iip@U.Num.Pl@ XIiXmorf_stem ; +IAR+Der/nv:%>%TRUNCiar HTR_r_GAQ_i ; +ILATSI+Der/nv:%>%TRUNCilatsi IV_voq ; +IRNIAR+Der/nv+Gram/IV:%>%TRUNCverniar IV_r ; !+IR+Der/nv+NIAR+Der/vv -!+IR+Der/nv+NIAR+Der/vv:%>%TRUNCverniar flex-iv ; -!+IR+Der/nv+NIAR+Der/vv+TAR+Der/vv:%>%TRUNCverniartar flex-iv ; +IRSIR+Der/nv:%>%TRUNCverser XIirXmorf_stem ; +IRUTE+Der/nv:%>%TRUNCverup XIuteXmorf ; +IRUTE+Der/nv:%>%TRUNCverup HTR_UTE_SSAQ_ssi ; diff --git a/src/fst/stems/nouns.lexc b/src/fst/stems/nouns.lexc index 2626c7f56..35e0dd164 100644 --- a/src/fst/stems/nouns.lexc +++ b/src/fst/stems/nouns.lexc @@ -19128,9 +19128,9 @@ nakorsartaaq Z1Zmorf ; nakorsartittoq Z1Zmorf ; nakorsassaq Z1Zmorf ; nakorsavik Z2-Zmorf ; -nakorsiartarfik Z2-Zmorf ; -nakorsiartartoq Z1Zmorf ; -nakorsiartoq Z1Zmorf ; +!nakorsiartarfik Z2-Zmorf ; +!nakorsiartartoq Z1Zmorf ; +!nakorsiartoq Z1Zmorf ; nakortinngitsoortoq Z1Zmorf ; nakortittoq Z1Zmorf ; nakorut tptmorf ; @@ -20894,7 +20894,7 @@ nipikillisoq Z1Zmorf ; nipikisaartoq Z1Zmorf ; nipikitsoralannguaq Z1nnguaqZmorf ; nipilaartoq Z1Zmorf ; -nipilersoqatigiit@U.Num.Pl@:nipilersoqatigiik@U.Num.Pl@ Z2-Zmorf ; +!nipilersoqatigiit@U.Num.Pl@:nipilersoqatigiik@U.Num.Pl@ Z2-Zmorf ; nipilersorpaluk Z2-Zmorf ; nipilersugaq GEMS ; nipilersugassiaq Z1Zmorf ; @@ -27007,11 +27007,11 @@ qatitujaarsuaq Z2aqZmorf ; qatituneq Z2+rZmorf ; qatituneq Z2Zmorf ; qatitusoq Z1Zmorf ; -qatserineq Z2Zmorf ; -qatserisartoq Z1Zmorf ; -qatserisartoqarfik Z2-Zmorf ; -qatserisartuusoq Z1Zmorf ; -qatserisoq Z1Zmorf ; +!qatserineq Z2Zmorf ; +!qatserisartoq Z1Zmorf ; +!qatserisartoqarfik Z2-Zmorf ; +!qatserisartuusoq Z1Zmorf ; +!qatserisoq Z1Zmorf ; qatserut tptmorf ; qatserutaasivik Z2-Zmorf ; qatserutaasoq Z1Zmorf ; diff --git a/src/fst/stems/verbs.lexc b/src/fst/stems/verbs.lexc index ae153618e..aadb5eead 100644 --- a/src/fst/stems/verbs.lexc +++ b/src/fst/stems/verbs.lexc @@ -5079,8 +5079,8 @@ inugiap IV_k_stem ; inugip XIiXmorf_stem ; inuge:inug HTR_schwa_SAQ_nnip ; inugaarup XIuteXmorf ; -inuiap IV_k_stem ; -inuiar HTR_r_GAQ_i ; +inuiap+Orth/Arch:inuiap IV_k_stem ; !kun et ex (Otto Rosing) i corpus +!inuiar HTR_r_GAQ_i ; inuilatsinar IV_r_stem ; inuilatsi XIi_voqXmorf_stem ; inuilli XIi_voqXmorf_stem ;