Skip to content

Commit 3e25850

Browse files
Extend Romance block to support Spanish
1 parent 5a7dd36 commit 3e25850

File tree

1 file changed

+72
-13
lines changed

1 file changed

+72
-13
lines changed

udapi/block/msf/romance/romance.py

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import udapi.block.msf.phrase
33
from enum import Enum
44

5+
AUXES_HAVE = ['ter', 'haber']
6+
AUXES_BE = ['estar']
7+
58
class Aspect(str, Enum):
69
IMP = 'Imp'
710
IMPPROG = 'ImpProg'
@@ -41,7 +44,7 @@ def process_node(self, node):
4144
self.process_copulas(node,cop,auxes,refl,expl)
4245
return
4346

44-
if node.upos == 'VERB':
47+
if node.upos == 'VERB': #TODO maybe add 'or node.feats['VerbForm'] == 'Part'?
4548
auxes = [x for x in node.children if x.udeprel == 'aux']
4649
aux_pass = [x for x in node.children if x.deprel == 'aux:pass']
4750
auxes_without_pass = [x for x in node.children if x.udeprel == 'aux' and x.deprel != 'aux:pass']
@@ -66,29 +69,48 @@ def process_node(self, node):
6669
phrase_ords = [node.ord] + [r.ord for r in refl]
6770
phrase_ords.sort()
6871

72+
# Portuguese
6973
# presente -> PhraseTense=Pres, PhraseAspect=''
7074
# Futuro do presente -> PhraseTense=Fut, PhraseAspect=''
75+
76+
# Spanish
77+
# presente -> PhraseTense=Pres, PhraseAspect=''
78+
# futuro simple -> PhraseTense=Fut, PhraseAspect=''
7179
aspect = ''
7280
tense = node.feats['Tense']
7381

7482
if node.feats['Mood'] == 'Ind':
75-
83+
84+
# Portuguese
7685
# pretérito imperfeito -> PhraseTense=Past, PhraseAspect=Imp
86+
87+
# Spanish
88+
# pretérito imperfecto -> PhraseTense=Past, PhraseAspect=Imp
7789
if node.feats['Tense'] == 'Imp':
7890
tense=Tense.PAST.value
7991
aspect=Aspect.IMP.value
8092

93+
# Portuguese
8194
# pretérito perfeito -> PhraseTense=Past, PhraseAspect=Perf
95+
96+
# Spanish
97+
# pretérito perfecto -> PhraseTense=Past, PhraseAspect=Perf
8298
if node.feats['Tense'] == 'Past':
8399
aspect=Aspect.PERF.value
84100

101+
# Portuguese
85102
# pretérito mais que perfeito simples -> PhraseTense=Past, PhraseAspect=Pqp
86103
if node.feats['Tense'] == 'Pqp':
87104
tense=Tense.PAST.value
88105
aspect=Aspect.PQP.value
89106

107+
# Portuguese
90108
# subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
91109
# subjunctive futuro -> PhraseTense=Fut, PhraseAspect=''
110+
111+
# Spanish
112+
# subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
113+
# subjunctive futuro -> PhraseTense=Fut, PhraseAspect='' TODO not annotated in treebanks?
92114
if node.feats['Mood'] == 'Sub':
93115

94116
if node.feats['Tense'] == 'Past':
@@ -99,7 +121,11 @@ def process_node(self, node):
99121
tense=Tense.PAST.value
100122
aspect=Aspect.IMP.value
101123

124+
# Portuguese
102125
# Futuro do pretérito (cnd) -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
126+
127+
# Spanish
128+
# pospretérito (cnd) -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
103129
if node.feats['Mood'] == 'Cnd':
104130
aspect=''
105131
tense=Tense.PRES.value
@@ -146,7 +172,6 @@ def process_node(self, node):
146172
else:
147173
self.process_periphrastic_verb_forms(aux_pass[0], auxes_without_pass, refl, auxes, node)
148174

149-
150175
def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_node):
151176
"""
152177
Parameters
@@ -166,19 +191,24 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
166191

167192
if len(auxes) == 1:
168193
# Cnd
169-
if ((auxes[0].lemma == 'ter' and node.feats['VerbForm'] == 'Part') or (auxes[0].lemma == 'estar' and node.feats['VerbForm'] == 'Ger')) and auxes[0].feats['Mood'] == 'Cnd':
194+
if ((auxes[0].lemma in AUXES_HAVE and node.feats['VerbForm'] == 'Part') or (auxes[0].lemma in AUXES_BE and node.feats['VerbForm'] == 'Ger')) and auxes[0].feats['Mood'] == 'Cnd':
170195
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl] + [r.ord for r in refl]
171196
phrase_ords.sort()
172197

198+
# Portuguese
173199
# aux estar cond + gerund -> PhraseTense=Pres, PhraseAspect=Prog, PhraseMood=Cnd
174200
if auxes[0].lemma == 'estar':
175201
tense=Tense.PRES.value
176202
aspect=Aspect.PROG.value
177203

178-
# Futuro do pretérito composto -> PhraseTense=Past, PhraseAspect=Perf, PhraseMood=Cnd
204+
# Portuguese
205+
# Futuro do pretérito composto -> PhraseTense=Past, PhraseAspect='', PhraseMood=Cnd
206+
207+
# Spanish
208+
# Antepospretérito -> PhraseTense=Past, PhraseAspect='', PhraseMood=Cnd
179209
else:
180210
tense=Tense.PAST.value
181-
aspect=Aspect.PERF.value
211+
aspect=''
182212

183213
self.write_node_info(head_node,
184214
tense=tense,
@@ -193,26 +223,30 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
193223
return
194224

195225
# Auxiliary 'estar' followed by a gerund
196-
if auxes[0].lemma == 'estar' and node.feats['VerbForm'] == 'Ger':
226+
if auxes[0].lemma in AUXES_BE and node.feats['VerbForm'] == 'Ger':
197227
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl]
198228
phrase_ords.sort()
199229

230+
# Portuguese + Spanish
200231
# pretérito imperfeito (aux estar) -> PhraseTense=Past, PhraseAspect=ImpProg
201232
# subjunctive pretérito imperfeito (aux estar) -> PhraseTense=Past, PhraseAspect=ImpProg, PhraseMood=Sub
202233
if auxes[0].feats['Tense'] == 'Imp':
203234
tense=Tense.PAST.value
204235
aspect=Aspect.IMPPROG.value
205236

237+
# Portuguese + Spanish
206238
# pretérito perfeito (aux estar) -> PhraseTense=Past, PhraseAspect=PerfProg
207239
elif auxes[0].feats['Tense'] == 'Past':
208240
tense=Tense.PAST.value
209241
aspect=Aspect.PERFPROG.value
210242

243+
# Portuguese + Spanish
211244
# conditional (aux estar) -> PhraseTense=Pres, PhraseAspect=Prog, PhraseMood=Cnd
212245
elif auxes[0].feats['Mood'] == 'Cnd':
213246
tense=Tense.PRES.value
214247
aspect=Aspect.PROG.value
215248

249+
# Portuguese + Spanish
216250
# presente (aux estar) -> PhraseTense=Pres, PhraseAspect=Prog
217251
# futuro do presente (aux estar) -> PhraseTense=Fut, PhraseAspect=Prog
218252
# subjunctive presente (aux estar) -> PhraseTense=Pres, PhraseAspect=Prog, PhraseMood=Sub
@@ -232,22 +266,38 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
232266
expl=expl,
233267
ords=phrase_ords)
234268

235-
# Auxiliary 'ter' followed by a participle
236-
if auxes[0].lemma == 'ter' and node.feats['VerbForm'] == 'Part':
269+
# Auxiliary 'ter' / 'haber' followed by a participle
270+
if auxes[0].lemma in AUXES_HAVE and node.feats['VerbForm'] == 'Part':
237271
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl]
238272
phrase_ords.sort()
239273

274+
# Portuguese
240275
# futuro do presente composto (aux ter) -> PhraseTense=Fut, PhraseAspect=Perf
276+
277+
# Spanish
278+
# Futuro compuesto antefuturo -> PhraseTense=Fut, PhraseAspect=Perf
241279
aspect=Aspect.PERF.value
242280
tense=auxes[0].feats['Tense']
243281

282+
# Portuguese
244283
# pretérito perfeito composto (aux ter) -> PhraseTense=PastPres, PhraseAspect=Perf
245284
# subjonctive pretérito perfeito composto (aux ter) -> PhraseTense=PastPres, PhraseAspect=Perf, PhraseMood=Sub
285+
246286
if auxes[0].feats['Tense'] == 'Pres':
247-
tense=Tense.PASTPRES.value
248287

249-
# pretérito mais que perfeito composto (aux ter/haver) -> PhraseTense=Past, PhraseAspect=Pqp
288+
# Spanish
289+
# Pretérito perfecto compuesto ante presente -> PhraseTense=Past, PhraseAspect=Perf
290+
if auxes[0].lemma == 'haber' and auxes[0].feats['Mood'] != 'Sub':
291+
tense = Tense.PAST.value
292+
else:
293+
tense=Tense.PASTPRES.value
294+
295+
# Portuguese
296+
# pretérito mais que perfeito composto (aux ter) -> PhraseTense=Past, PhraseAspect=Pqp
250297
# subjonctive pretérito mais-que-perfeito composto (aux ter) -> PhraseTense=Past, PhraseAspect=Pqp, PhraseMood=Sub
298+
299+
# Spanish
300+
# pretérito pluscuamperfecto -> PhraseTense=Past, PhraseAspect=Pqp
251301
elif auxes[0].feats['Tense'] in ['Imp', 'Past']: # TODO prej neni v Past, jenom Imp
252302
tense=Tense.PAST.value
253303
aspect=Aspect.PQP.value
@@ -263,6 +313,8 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
263313
expl=expl,
264314
ords=phrase_ords)
265315

316+
# Portuguese
317+
# pretérito mais que perfeito composto (aux haver) -> PhraseTense=Past, PhraseAspect=Perf
266318
if auxes[0].lemma == 'haver' and auxes[0].feats['Tense'] == 'Imp' and node.feats['VerbForm'] == 'Part':
267319
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl]
268320
phrase_ords.sort()
@@ -303,6 +355,7 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
303355

304356

305357
# auxiliary 'ir' followed by infinitive
358+
# TODO solve these verb forms for Spanish (VERB 'ir' + ADP 'a' + infinitive)
306359
if auxes[0].lemma == 'ir' and node.feats['VerbForm'] == 'Inf':
307360
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl]
308361
phrase_ords.sort()
@@ -368,7 +421,11 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
368421
ords=phrase_ords)
369422

370423
elif len(auxes) == 2:
424+
# Portuguese
371425
# auxiliry 'ir' followed by auxiliary 'estar' in infinitive and a gerund
426+
427+
# TODO Spanish
428+
# VERB 'ir' + ADP 'a' + AUX 'estar'.Inf + gerund
372429
if auxes[0].lemma == 'ir' and auxes[1].lemma == 'estar' and node.feats['VerbForm'] == 'Ger':
373430
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl]
374431
phrase_ords.sort()
@@ -431,8 +488,8 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
431488

432489

433490

434-
# Cnd (only ter), Sub and Past,Pres,Fut tenses: 2 auxes - ter + estar
435-
if auxes[0].lemma in ['ter', 'haver'] and auxes[1].lemma == 'estar' and node.feats['VerbForm'] == 'Ger':
491+
# Cnd (only ter/haber), Sub and Past,Pres,Fut tenses: 2 auxes - ter/haber + estar
492+
if auxes[0].lemma in AUXES_HAVE and auxes[1].lemma == 'estar' and node.feats['VerbForm'] == 'Ger':
436493
phrase_ords = [head_node.ord] + [x.ord for x in all_auxes] + [r.ord for r in refl]
437494
phrase_ords.sort()
438495

@@ -473,6 +530,8 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
473530
return
474531

475532
def process_copulas(self, node, cop, auxes, refl, expl):
533+
534+
aspect = ''
476535

477536
if not auxes:
478537
tense = cop[0].feats['Tense']

0 commit comments

Comments
 (0)