@@ -548,13 +548,27 @@ def process_node(self, node):
548548 'NumForm' : ['Digit' , 'Roman' ]
549549 })
550550 else :
551+ if node .feats ['NumType' ] == 'Sets' :
552+ # 'jedny', 'dvoje', 'troje', 'čtvery'
553+ # Number should perhaps be only Plur because the counted noun will be Plur.
554+ # Gender is not annotated in PDT but there are different forms ('jedni' vs. 'jedny',
555+ # and in Old Czech also 'dvoji' vs. 'dvoje'), so we should allow Gender (and Animacy).
556+ self .check_required_features (node , ['NumType' , 'NumForm' , 'Number' , 'Case' ])
557+ self .check_allowed_features (node , {
558+ 'NumType' : ['Sets' ],
559+ 'NumForm' : ['Word' ],
560+ 'Gender' : ['Masc' , 'Fem' , 'Neut' ],
561+ 'Animacy' : ['Anim' , 'Inan' ],
562+ 'Number' : ['Sing' , 'Dual' , 'Plur' ],
563+ 'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Ins' ]
564+ })
551565 # 'jeden' has Gender, Animacy, Number, Case: jeden, jedna, jedno, jednoho, jednomu, jednom, jedním, jedné, jednu, jednou, jedni, jedny, jedněch, jedněm, jedněmi.
552566 # 'dva', 'oba' have Gender, Number=Dual(Plur in modern Czech), Case: dva, dvě, dvou, dvěma.
553567 # 'tři', 'čtyři' have Number=Plur, Case: tři, třech, třem, třemi.
554568 # 'pět' and more have Number=Plur, Case: pět, pěti.
555569 # 'půl' has no Number and Case, although it behaves syntactically similarly to 'pět' (but genitive is still 'půl', not '*půli').
556570 # 'sto', 'tisíc', 'milión', 'miliarda' etc. have Gender (+ possibly Animacy) and Number (depending on their form).
557- if node .lemma == 'jeden' :
571+ elif node .lemma == 'jeden' :
558572 self .check_required_features (node , ['NumType' , 'NumForm' , 'Number' , 'Case' ])
559573 self .check_allowed_features (node , {
560574 'NumType' : ['Card' ],
@@ -594,7 +608,7 @@ def process_node(self, node):
594608 elif re .match (r'^(sto|tisíc|.+ili[oó]n|.+iliarda)$' , node .lemma ):
595609 self .check_required_features (node , ['NumType' , 'NumForm' , 'Number' , 'Case' ])
596610 self .check_allowed_features (node , {
597- 'NumType' : ['Card' , 'Sets' ],
611+ 'NumType' : ['Card' ],
598612 'NumForm' : ['Word' ],
599613 'Gender' : ['Masc' , 'Fem' , 'Neut' ],
600614 'Animacy' : ['Anim' , 'Inan' ],
@@ -610,7 +624,7 @@ def process_node(self, node):
610624 # On the other hand, we may want to allow Dual for "stě".
611625 self .check_required_features (node , ['NumType' , 'NumForm' , 'Number' , 'Case' ])
612626 self .check_allowed_features (node , {
613- 'NumType' : ['Card' , 'Sets' ],
627+ 'NumType' : ['Card' ],
614628 'NumForm' : ['Word' ],
615629 'Number' : ['Sing' , 'Dual' , 'Plur' ],
616630 'Case' : ['Nom' , 'Gen' , 'Dat' , 'Acc' , 'Voc' , 'Loc' , 'Ins' ]
0 commit comments