Skip to content

Commit 0bf7f7a

Browse files
committed
Allow gender with set numerals in Czech.
1 parent 1d7152d commit 0bf7f7a

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

udapi/block/ud/cs/markfeatsbugs.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,13 +548,27 @@ def process_node(self, node):
548548
'NumForm': ['Digit', 'Roman']
549549
})
550550
else:
551+
if node.feats['NumType'] == 'Sets':
552+
# 'jedny', 'dvoje', 'troje', 'čtvery'
553+
# Number should perhaps be only Plur because the counted noun will be Plur.
554+
# Gender is not annotated in PDT but there are different forms ('jedni' vs. 'jedny',
555+
# and in Old Czech also 'dvoji' vs. 'dvoje'), so we should allow Gender (and Animacy).
556+
self.check_required_features(node, ['NumType', 'NumForm', 'Number', 'Case'])
557+
self.check_allowed_features(node, {
558+
'NumType': ['Sets'],
559+
'NumForm': ['Word'],
560+
'Gender': ['Masc', 'Fem', 'Neut'],
561+
'Animacy': ['Anim', 'Inan'],
562+
'Number': ['Sing', 'Dual', 'Plur'],
563+
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins']
564+
})
551565
# 'jeden' has Gender, Animacy, Number, Case: jeden, jedna, jedno, jednoho, jednomu, jednom, jedním, jedné, jednu, jednou, jedni, jedny, jedněch, jedněm, jedněmi.
552566
# 'dva', 'oba' have Gender, Number=Dual(Plur in modern Czech), Case: dva, dvě, dvou, dvěma.
553567
# 'tři', 'čtyři' have Number=Plur, Case: tři, třech, třem, třemi.
554568
# 'pět' and more have Number=Plur, Case: pět, pěti.
555569
# 'půl' has no Number and Case, although it behaves syntactically similarly to 'pět' (but genitive is still 'půl', not '*půli').
556570
# 'sto', 'tisíc', 'milión', 'miliarda' etc. have Gender (+ possibly Animacy) and Number (depending on their form).
557-
if node.lemma == 'jeden':
571+
elif node.lemma == 'jeden':
558572
self.check_required_features(node, ['NumType', 'NumForm', 'Number', 'Case'])
559573
self.check_allowed_features(node, {
560574
'NumType': ['Card'],
@@ -594,7 +608,7 @@ def process_node(self, node):
594608
elif re.match(r'^(sto|tisíc|.+ili[oó]n|.+iliarda)$', node.lemma):
595609
self.check_required_features(node, ['NumType', 'NumForm', 'Number', 'Case'])
596610
self.check_allowed_features(node, {
597-
'NumType': ['Card', 'Sets'],
611+
'NumType': ['Card'],
598612
'NumForm': ['Word'],
599613
'Gender': ['Masc', 'Fem', 'Neut'],
600614
'Animacy': ['Anim', 'Inan'],
@@ -610,7 +624,7 @@ def process_node(self, node):
610624
# On the other hand, we may want to allow Dual for "stě".
611625
self.check_required_features(node, ['NumType', 'NumForm', 'Number', 'Case'])
612626
self.check_allowed_features(node, {
613-
'NumType': ['Card', 'Sets'],
627+
'NumType': ['Card'],
614628
'NumForm': ['Word'],
615629
'Number': ['Sing', 'Dual', 'Plur'],
616630
'Case': ['Nom', 'Gen', 'Dat', 'Acc', 'Voc', 'Loc', 'Ins']

0 commit comments

Comments
 (0)