SFST magyar főnevek va-ve-val-vel

A Programozás Wiki wikiből
%%%%%%%%%%%%%% raggenpl2.fst %%%%%%%%%%%%%%%%%%%
% http://wiki.prog.hu/wiki/SFST_magyar_főnévragozás vá-vé-val-vel
%%%%%%%%%%%%%% Hungarian nouns vá,vé, val, vel endings (case: ins, fac)  %%%%%%%%%%%
% ins val, vel
% fac vá, vé
%%% words
% tar
% né
%%%% rule
% tar-rá
% tar-om-má
% né-vé
% tar-é-vá
% tar-ok-ká
% nék-ké
% né-é-vé
% ....
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Vocal harmony is solved by using FrontVowel and
% BackVowel susbtitution variables
%%%%%%%%%%%%%%% symbols %%%%%%%%%%%%%%%%%%%%%%
#cons# = bcdfghjklmnprstvwxyz
#vowel# = aeiouáéíówúöüőű
#letter# = #cons##vowel#
#FrontVowel# = eéiíöőüű
#FrontVowelUnrounded# = eéií
#BackVowel# = aáouóú
#BackVowel1# = ouóú
#FrontVowelRounded# = öüőű
%%%%%%%%%%%%%%% phon rules %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Here are all substitute variables listed as an overview %%%
% they must be filled separate for each rule %%%%%%%%%%%%%%%%
% ALPHABET = [#cons#] [#vowel#] <AA>:e <AA>:a  \
%          <AAA>:é <AAA>:á \
%          <oeoez>:o <oeoez>:e <oeoez>:ö <oeoez>:<> \
%          <VV>:v <VV>:<>  \
%          <OEOE>:o <OEOE>:e <OEOE>:ö \
%          <OEOE1>:o <OEOE1>:e <OEOE1>:ö <OEOE1>:<> \
%          <Noun> <dup> <ins> <fac>
% First substitute variable, <AA> will be given value here
ALPHABET = [#cons#] [#vowel#] <Noun> <dup> <ins> <fac> \
	 <AAA> <oeoez> <VV> <OEOE> <OEOE1> <AA>:[ea]
$ma0$ =  ([#BackVowel#] [#cons#]* <Noun> .*) <AA> <=>  a 

% Second substitute variable, <AAA> will be given value here
ALPHABET = [#cons#] [#vowel#] <Noun> <dup> <ins> <fac> \
         <oeoez> <VV> <OEOE> <OEOE1> <AAA>:[éá]
$ma1$ =  ([#BackVowel#] [#cons#]* <Noun> .*) <AAA> <=>  á 

% Third substitute variable, <oeoez> will be given value here
ALPHABET = [#cons#] [#vowel#] <Noun> <dup> <ins> <fac> \
         <VV> <OEOE> <OEOE1> <oeoez>:[oeö<>]
$As2$ =  ([#BackVowel#] [#cons#]+ <Noun> .*) <oeoez> <=>  o 
$As3$ =  ([#FrontVowelRounded#] [#cons#]+ <Noun> .*) <oeoez> <=>  ö 
$As4$ =  ([#FrontVowelUnrounded#] [#cons#]+ <Noun> .*) <oeoez> <=>  e 

$ma2$ = ($As2$ & $As3$ & $As4$) 

% Fourth substitute variable, <OEOE> will be given value here
ALPHABET = [#cons#] [#vowel#] <Noun> <dup> <ins> <fac> \
         <VV> <OEOE1> <OEOE>:[oeö]
$oeoe1$ =  ([#BackVowel#] [#cons#]* <Noun> .*) <OEOE> <=>  o 
$oeoe2$ = ([#FrontVowelUnrounded#] [#cons#]* <Noun> .*) <OEOE> <=>  e 
$rp1$ = $oeoe1$ &  $oeoe2$  

% Fifth substitute variable, <OEOE1> will be given value here
ALPHABET = [#cons#] [#vowel#] <Noun> <dup> <ins> <fac> \
         <VV> <OEOE1>:[oeö<>]
$oeoe11$ =  ([#BackVowel#] [#cons#]+ <Noun> .*) <OEOE1> <=>  o 
$oeoe12$ = ([#FrontVowelUnrounded#] [#cons#]+ <Noun> .*) <OEOE1> <=>  e 
$oeoe13$ = ([#FrontVowelRounded#] [#cons#]+ <Noun> .*) <OEOE1> <=>  ö 
$rp2$ = $oeoe11$ &  $oeoe12$  & $oeoe13$

% Sixth substitute variable, <VV> will be given value here
ALPHABET = [#cons#] [#vowel#] <Noun> <dup> <ins> <fac> \
	 <VV>:[v<>]
$rp3$ = (([#vowel#]<Noun>[^#cons#]*) | (<Noun>[#vowel#][^#cons#]*)| (<Noun>.*[#vowel#][^#cons#]*) <dup> .*)  <VV> <=>  v % tarévá, lévé

% Using agreement value for duplicating of consonant for vá/vé/val/vel endings
#=D# = #cons#
$T$ = {[#=D#]}:{[#=D#][#=D#]}
ALPHABET = [#cons#] [#vowel#] <Noun> <fac> <dup><ins>
% tar-> tarrá
$duplicate1$ = $T$ ^-> ( __ <Noun> [^#letter#]* <dup> .*)
% tarok -> tarokká
$duplicate2$ = $T$ ^-> ( <Noun>  .* __ [^#vowel#]* <dup> .*)

% a to á and e to é if word ends with a or e
$a_to_aa$ = {a}:{á} ^-> (__ [<Noun>] [^k].*)
$e_to_ee$ = {e}:{é} ^-> (__ [<Noun>] [^k].*)
% delete pos tags
ALPHABET = [#cons#] [#vowel#] [<Noun><dup>]:<>
$delete_POS$ =  .*

%%%%%%%%%%%%%%%%%% inflections%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% A tiny subset of Hungarian inflection to show the principle %
% <Noun> is needed to see the word''s end
$plu$ = <pl>:{<oeoez>k}

$nom$ =  <nom>:{}
$ins$ =  <ins>:{<dup><VV><AA>l}
$fac$ =  <fac>:{<dup><VV><AAA>} 

$case$ =  $fac$ | $ins$ | $nom$
$posss1$ =  {<posss1>}:{<OEOE1>m} 
$poss$ =  $posss1$ 
$gen$ =  {<gen>}:{é} 
$gen$ =  $gen$ 

% (ház-aknak ház-amnak ház-ak-é(i)nak ház-am-é(i)nak ház-nak) 
$CASEP$ = ($plu$?  | $poss$?) $gen$? $case$

%%%%%%%%%%%%%%%%%%%%%%% HU morphology for nouns%%%%%%%%%%%%%%%%%%
%%% This is a highly memory effective and fast compiling %%%%%%%%
%%% solution by subsequently applying the rules to the words %%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$morph$ = "noun-reg001.lex" <Noun>
$morph$ = $morph$ $CASEP$
$morph$ >> "morph1.a"   % Test if all case suffixes there
$morph$ = $morph$ || $ma0$
$morph$ = $morph$ || $ma1$
$morph$ = $morph$ || $ma2$
$morph$ = $morph$ || $rp1$
$morph$ = $morph$ || $rp2$
$morph$ = $morph$ || $rp3$
$morph$ = $morph$ || $duplicate1$
$morph$ = $morph$ || $duplicate2$
$morph$ = $morph$ || $a_to_aa$
$morph$ = $morph$ || $e_to_ee$
$morph$ = $morph$ || $delete_POS$
$morph$

%%%%%%%%%%% noun-reg001.lex %%%%%%%%%%%%%%%%%%%%
% tar
% né
%%%%%%%%%% fst-generate reggenpl2.a | interpret1.pl %%%%%%%%%%%%%
% né  sav:'<Noun><nom>' 
% tar  sav:'<Noun><nom>' 
% néé  sav:'<Noun><gen><nom>' 
% taré  sav:'<Noun><gen><nom>' 
% ném  sav:'<Noun><posss1><nom>' 
% nék  sav:'<Noun><pl><nom>' 
% névé  sav:'<Noun><fac>' 
% tarrá  sav:'<Noun><fac>' 
% tarom  sav:'<Noun><posss1><nom>' 
% tarok  sav:'<Noun><pl><nom>' 
% néévé  sav:'<Noun><gen><fac>' 
% némé  sav:'<Noun><posss1><gen><nom>' 
% néké  sav:'<Noun><pl><gen><nom>' 
% nével  sav:'<Noun><ins>' 
% tarral  sav:'<Noun><ins>' 
% tarévá  sav:'<Noun><gen><fac>' 
% taromé  sav:'<Noun><posss1><gen><nom>' 
% taroké  sav:'<Noun><pl><gen><nom>' 
% néével  sav:'<Noun><gen><ins>' 
% némmé  sav:'<Noun><posss1><fac>' 
% nékké  sav:'<Noun><pl><fac>' 
% taréval  sav:'<Noun><gen><ins>' 
% tarommá  sav:'<Noun><posss1><fac>' 
% tarokká  sav:'<Noun><pl><fac>' 
% némévé  sav:'<Noun><posss1><gen><fac>' 
% némmel  sav:'<Noun><posss1><ins>' 
% nékévé  sav:'<Noun><pl><gen><fac>' 
% nékkel  sav:'<Noun><pl><ins>' 
% taromévá  sav:'<Noun><posss1><gen><fac>' 
% tarommal  sav:'<Noun><posss1><ins>' 
% tarokévá  sav:'<Noun><pl><gen><fac>' 
% tarokkal  sav:'<Noun><pl><ins>' 
% némével  sav:'<Noun><posss1><gen><ins>' 
% nékével  sav:'<Noun><pl><gen><ins>' 
% taroméval  sav:'<Noun><posss1><gen><ins>' 
% tarokéval  sav:'<Noun><pl><gen><ins>' 
%%%%%% do.sh%%%%%%%%%
% fst-compiler-utf8 raggenpl2.fst raggenpl2.a
% fst-generate morph8.a >/tmp/morph8
% ...

Lásd még[szerkesztés]