SFST magyar főnevek jajeae ingadozó

A Programozás Wiki wikiből
%%%%%%%%%%%%%%% jaje1.fst %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%   Hungarian noun dob, had, rác  jaje ae %%%%%%%%%%%%%%
%%%% jaje for fricativa, ae for nonfricative, both for rest (ingadozó)
#cons# = bcdfghjklmnpqrstvwxyz
#vowel# = aeiouáéíóúöüőű
#letter# = #cons##vowel#
#magas# = eéiíöőüű
#mely# = aáouóú
#oeue# =öüőű
#ei# = eéií
#magasito# = #ei#<oeoez1><OEOE1>
%%%%%%%%%%%%%%% phonetic rules %%%%%%%%%%%%%%%%%%%%
#marker# = <soc><tpos><tpl><soc><call><gen><pl>
ALPHABET = [#cons#] [#vowel#]      \
          <OEOE1>:<>               \ % om, em, öm
          <plvs>:<> \
          <J>:j <J>:<>    \
          <AE>:a    \
          <OEOE>:a  \ 
          <EOA>:a <EOA>:<>         \ % e-stül, o-stul, ö-stül
          <oeoez1>:<e>             \ % lov-a-k, köv-e-k kör-ö-k
          <Noun> [#marker#]
$Del_pV$ = .*

% sounds
$Fs2$ =  ( <Noun>.*[#cons#][<soc>].*) <EOA> <=>  e 
$ma2$ =  $Fs2$ 
#plosiv# = bghpt
#nonplosiv# = cjsvz
#ing# = dfmnry
$j1$ =  ([#nonplosiv#]+ <Noun> .*) <J> <=>  <> 
$j2$ =  ([#plosiv#]+ <Noun> .*) <J> <=>  j 
$rp3$ = $j1$ | $j2$   %%%%%%%%%%%%%%%%%%%%%%%%% This is the point, | instead of &


$ma$ = ( $ma2$ & $rp3$) || $Del_pV$
%$ma$ = ( $ma2$ ) || $Del_pV$


%$ma$ = $ma2$

ALPHABET = [#cons#] [#vowel#] [<Noun>#marker#]:<>
$delete-POS$ =  .*
ALPHABET = [#cons#] [#vowel#] <Noun> [#marker#] 
$ue-to-uev$ = {ű}:{űve} ^-> (__ [<Noun>].*[<soc><pl>])
$eee-to-ev$ = {e}:{<>} ^-> (__ [<Noun>] [<gen>é].*)
$oe-to-oev$ = {ő}:{öve} ^-> (__ [<Noun>].*[<soc><pl>])


%%%% inflection of nouns (subset) %%%%%%%%%%%%
$plu$ = <pl>:{<pl>ak}

%$posss1$ =  {<posss1>}:{m} 
$posss1$ =  {<posss1>}:{<tpos><plvs><OEOE1>m}         % om, em, öm
$posss2$ =  {<posss2>}:{<tpos><plvs><OEOE1>d}         % od, ed, öd
$posss3$ =  {<posss3>}:{<tpos><plvs><J><AE>}          % ja, je
$possp1$ =  {<possp1>}:{<tpos><plvs><UUE>nk}          % unk, ünk
$possp2$ =  {<possp2>}:{<tpos><plvs><OEOE1>t<OEOE>k}  % otok, etek
$possp3$ =  {<possp3>}:{<tpos><plvs><J><UUE1>k}       % juk, jük
$posss1p$ =  {<posss1p>}:{<tpos><plvs><J><AE>im}      % jaim
$posss2p$ =  {<posss2p>}:{<tpos><plvs><J><AE>id}      % jaid
$posss3p$ =  {<posss3p>}:{<tpos><plvs><J><AE>i}       % jai
$possp1p$ =  {<possp1p>}:{<tpos><plvs><J><AE>ink}     % jaink
$possp2p$ =  {<possp2p>}:{<tpos><plvs><J><AE>it<OEOE>k}  % jaitok
$possp3p$ =  {<possp3p>}:{<tpos><plvs><J><AE>ik}      % jaik

$poss$ =  $posss3$ 
%$poss$ =  $posss1$ | $posss2$ | $posss3$ | $possp1$ | $possp2$ | $possp3$ |\
%         $posss1p$ | $posss2p$ | $posss3p$ | $possp1p$ | $possp2p$ | $possp3p$

$gen$ =  {<gen>}:{<gen>é} 
$nom$ =  <nom>:{}
$dat$ =  <dat>:{nek}                 % nak, nek
$soc$ =  <soc>:{<soc><EOA>stül} 
$case$ = $nom$ 
%$CASEP$ =( ($plu$?  | $poss$? ) $gen$? $case$ ) | $soc$
%$CASEP$ =( ($plu$?  | $poss$? ) $gen$? $case$ ) 
$CASEP$ = $posss3$

%%%%%%%%%%% build transductor
$morph$ =  "jaje-reg.lex" <Noun>                  % read in
$morph$ >> "morph1.a"
$morph$ = $morph$ $CASEP$                       % add inflect
$morph$ >> "morph2.a"
$morph$ = $morph$ || $ma$              % apply inflect rules
$morph$ >> "morph3.a"
$morph$ = $morph$ || $ue-to-uev$ ||$oe-to-oev$  % apply inflect rules
$morph$ >> "morph4.a"
$morph$ = $morph$ || $eee-to-ev$  % apply inflect rules
$morph$ >> "morph5.a"
$morph$ =  $morph$ || $delete-POS$              %  clean up
$morph$

%%%%%%%%%%% mm-reg.lex %%%%%%%%%%%
% rác, had, dob
%%%%%%%%%%%%  result %%%%%%%%
% ráca  sav:'<Noun><posss3>'   % ae
% dobja  sav:'<Noun><posss3>'  % jaje
% hadja  sav:'<Noun><posss3>'  % both
% hada  sav:'<Noun><posss3>'   % both

Lásd még[szerkesztés]