SFST magyar többesszám és esetek I

A Programozás Wiki wikiből

Makefile[szerkesztés]

tobbrag.a: tobbrag.fst ninfl1.fst phon1.fst symbols1.fst
%.a: %.fst
    fst-compiler-utf8 $< $@

clean:
    rm -f *.a *~

symbols1.fst[szerkesztés]

%%%%%%%%%%%%%%% symbols %%%%%%%%%%%%%%%%%%%%
#cons# = bcdfghjklmnprstvwxyz
#vowel# = aeiouáéíówúöüőű
#magas# = eéiíöőüű
#magas1# = eéií
#mely# = aáouóú
#mely1# = ouóú
#oe# = öüőű
#aa# = aá


ninfl1.fst[szerkesztés]

%#include "symbols1.fst"

$plu$ = <pl>:{<AA>k}

$nom$ =  <nom>:{}
$dat$ =  <dat>:{n<AA>k}
$acc$ =  <acc>:{<oeoe>t}
$ill$ =  <ill>:{b<AA>}
$ine$ =  <ine>:{b<AA>n}
$ela$ =  <ela>:{b<oooe>l}
$all$ =  <all>:{h<oeoe>z}
$ade$ =  <ade>:{n<AAA>l}
$abl$ =  <abl>:{t<oooe>l}
$sup$ =  <sup>:{<oeoe>n}
$del$ =  <del>:{r<oooe>l}
$ter$ =  <ter>:{ig}
$ess$ =  <ess>:{<uue>l}
$for$ =  <for>:{ként}
$tem$ =  <tem>:{kor}
$cau$ =  <cau>:{ért}
$soc$ =  <soc>:{<EOA>st<uue>l}
$fac$ =  <fac>:{<VV><AAA>}
$dis$ =  <dis>:{<oeoe>nként}

$case$ = $nom$ | $dat$ | $acc$ | $ill$ | $ine$ | $ela$ | $all$ | $ade$ |\
         $abl$ | $sup$ | $del$ | $ter$ | $ess$ | $for$ | $tem$ |\
         $cau$ | $soc$ | $fac$ | $dis$

$PLU$ = $plu$

$CASEP$ = $plu$? $case$

phon1.fst[szerkesztés]

%#include "symbols1.fst"
ALPHABET = [#cons#] [#vowel#] <AA>:e <AA>:a  \
         <AAA>:é <AAA>:á <oooe>:ó <oooe>:ő \
         <oeoe>:e <oeoe>:ö <oeoe>:o  \
         <uue>:u <uue>:ü <VV>:v <VV>:<>  \
         <EOA>:e <EOA>:o <EOA>:a <EOA>:ö <EOA>:<> \
         <Noun>
$As$ =  ([#mely#] [#cons#]+ <Noun> .*) <AA> <=>  a
$As2$ = ([#magas#] [#cons#]+ <Noun> .*) <AA> <=>  e
$Del_pV$ = .*
$ma1$ = $As$ &  $As2$ 
$Bs1$ =  ([#mely#] [#cons#]* <Noun> .*) <oooe> <=>  ó
$ma2$ = $Bs1$ 
$Cs1$ =  ([#mely#] [#cons#]* <Noun> .*) <AAA> <=>  á
$ma3$ = $Cs1$ 
$Ds1$ =  ([#mely#] [#cons#]* <Noun> .*) <oeoe> <=>  o
$Ds2$ =  ([#magas1#] [#cons#]* <Noun> .*) <oeoe> <=>  e
$Ds3$ =  ([#oe#] [#cons#]* <Noun> .*) <oeoe> <=>  ö
$ma4$ = $Ds1$ & $Ds2$ & $Ds3$ 
$Es1$ =  ([#mely#] [#cons#]* <Noun> .*) <uue> <=>  u
$ma5$ = $Es1$ 
$Fs1$ =  ([#mely1#] [#cons#]* <Noun> .*) <EOA> <=>  o
$Fs2$ =  ([#aa#] [#cons#]* <Noun> .*) <EOA> <=>  a
$Fs3$ =  ([#magas1#] [#cons#]* <Noun> .*) <EOA> <=>  e
$Fs4$ =  ([#oe#] [#cons#]* <Noun> .*) <EOA> <=>  ö
$ma6$ = $Fs1$ & $Fs2$ & $Fs3$ & $Fs4$ 

$ma$ = ($ma1$ & $ma2$ & $ma3$ & $ma4$ & $ma5$ & $ma6$) || $Del_pV$

% a to á and e to é if word ends with a or e
ALPHABET = [#cons#] [#vowel#] <Verb><Noun><Adj> <pl>
$a-to-aa$ = {a}:{á} ^-> (__ [<Noun>] [^k].*)
$e-to-ee$ = {e}:{é} ^-> (__ [<Noun>] [^k].*)
% delete pos tags
ALPHABET = [#cons#] [#vowel#] [<Verb><Noun><Adj>]:<>
$delete-POS$ =  .*


$phon$ = $ma$ || $a-to-aa$ ||  $e-to-ee$ || $delete-POS$


tobbrag.fst[szerkesztés]

%%%%%%%%%%%%%%%%% simplified HU plural+cases %%%%%%%%%%%%%%%%%%%%%%%%%%5
#include "symbols1.fst"
#include "phon1.fst"
#include "ninfl1.fst"

$morph$ = "noun-reg1.lex" <Noun>
$morph$ >> "morph9.a"
$morph$ = $morph$ $CASEP$
$morph$ >> "morph10.a"
$morph$ = $morph$ || $phon$
$morph$

%%%%%%%%%%%%%%%%%%% noun-reg1.lex %%%%%%%%%%%%%%%%%%%
% kapa
% káka
% kefe
% kör
% vér
% halász
% baj
% bor
% tyúk

noun-reg1.lex[szerkesztés]

kapa
káka
kefe
kör
vér
halász
baj
bor
tyúk


Lásd még[szerkesztés]