Source code for aser.extract.eventuality_extractor
import bisect
from copy import copy, deepcopy
from itertools import chain, permutations
from .discourse_parser import ConnectiveExtractor
from .discourse_parser import SyntaxTree
from .rule import ALL_EVENTUALITY_RULES
from .utils import parse_sentense_with_stanford, get_corenlp_client, get_clauses, powerset
from .utils import ANNOTATORS
from ..eventuality import Eventuality
[docs]class BaseEventualityExtractor(object):
""" Base ASER eventuality extractor to extract eventualities
"""
def __init__(self, corenlp_path="", corenlp_port=0, **kw):
"""
:param corenlp_path: corenlp path, e.g., /home/xliucr/stanford-corenlp-3.9.2
:type corenlp_path: str (default = "")
:param corenlp_port: corenlp port, e.g., 9000
:type corenlp_port: int (default = 0)
:param kw: other parameters
:type kw: Dict[str, object]
"""
self.corenlp_path = corenlp_path
self.corenlp_port = corenlp_port
self.annotators = kw.get("annotators", list(ANNOTATORS))
_, self.is_externel_corenlp = get_corenlp_client(corenlp_path=self.corenlp_path, corenlp_port=self.corenlp_port)
[docs] def close(self):
""" Close the extractor safely
"""
if not self.is_externel_corenlp:
corenlp_client, _ = get_corenlp_client(corenlp_path=self.corenlp_path, corenlp_port=self.corenlp_port)
corenlp_client.stop()
def __del__(self):
self.close()
[docs] def parse_text(self, text, annotators=None):
""" Parse a raw text by corenlp
:param text: a raw text
:type text: str
:param annotators: annotators for corenlp, please refer to https://stanfordnlp.github.io/CoreNLP/annotators.html
:type annotators: Union[List, None] (default = None)
:return: the parsed result
:rtype: List[Dict[str, object]]
.. highlight:: python
.. code-block:: python
Input:
"My army will find your boat. In the meantime, I'm sure we could find you suitable accommodations."
Output:
[{'dependencies': [(1, 'nmod:poss', 0),
(3, 'nsubj', 1),
(3, 'aux', 2),
(3, 'dobj', 5),
(3, 'punct', 6),
(5, 'nmod:poss', 4)],
'lemmas': ['my', 'army', 'will', 'find', 'you', 'boat', '.'],
'mentions': [],
'ners': ['O', 'O', 'O', 'O', 'O', 'O', 'O'],
'parse': '(ROOT (S (NP (PRP$ My) (NN army)) (VP (MD will) (VP (VB find) (NP '
'(PRP$ your) (NN boat)))) (. .)))',
'pos_tags': ['PRP$', 'NN', 'MD', 'VB', 'PRP$', 'NN', '.'],
'text': 'My army will find your boat.',
'tokens': ['My', 'army', 'will', 'find', 'your', 'boat', '.']},
{'dependencies': [(2, 'case', 0),
(2, 'det', 1),
(6, 'nmod:in', 2),
(6, 'punct', 3),
(6, 'nsubj', 4),
(6, 'cop', 5),
(6, 'ccomp', 9),
(6, 'punct', 13),
(9, 'nsubj', 7),
(9, 'aux', 8),
(9, 'iobj', 10),
(9, 'dobj', 12),
(12, 'amod', 11)],
'lemmas': ['in',
'the',
'meantime',
',',
'I',
'be',
'sure',
'we',
'could',
'find',
'you',
'suitable',
'accommodation',
'.'],
'mentions': [],
'ners': ['O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O'],
'parse': '(ROOT (S (PP (IN In) (NP (DT the) (NN meantime))) (, ,) (NP (PRP '
"I)) (VP (VBP 'm) (ADJP (JJ sure) (SBAR (S (NP (PRP we)) (VP (MD "
'could) (VP (VB find) (NP (PRP you)) (NP (JJ suitable) (NNS '
'accommodations)))))))) (. .)))',
'pos_tags': ['IN',
'DT',
'NN',
',',
'PRP',
'VBP',
'JJ',
'PRP',
'MD',
'VB',
'PRP',
'JJ',
'NNS',
'.'],
'text': "In the meantime, I'm sure we could find you suitable "
'accommodations.',
'tokens': ['In',
'the',
'meantime',
',',
'I',
"'m",
'sure',
'we',
'could',
'find',
'you',
'suitable',
'accommodations',
'.']}]
"""
if annotators is None:
annotators = self.annotators
corenlp_client, _ = get_corenlp_client(
corenlp_path=self.corenlp_path, corenlp_port=self.corenlp_port, annotators=annotators
)
parsed_result = parse_sentense_with_stanford(text, corenlp_client, self.annotators)
return parsed_result
[docs] def extract_from_text(self, text, output_format="Eventuality", in_order=True, use_lemma=True, annotators=None, **kw):
""" Extract eventualities from a raw text
:param text: a raw text
:type text: str
:param output_format: which format to return, "Eventuality" or "json"
:type output_format: str (default = "Eventuality")
:param in_order: whether the returned order follows the input token order
:type in_order: bool (default = True)
:param use_lemma: whether the returned eventuality uses lemma
:type use_lemma: bool (default = True)
:param annotators: annotators for corenlp, please refer to https://stanfordnlp.github.io/CoreNLP/annotators.html
:type annotators: Union[List, None] (default = None)
:param kw: other parameters
:type kw: Dict[str, object]
:return: the extracted eventualities
:rtype: Union[List[List[aser.eventuality.Eventuality]], List[List[Dict[str, object]]], List[aser.eventuality.Eventuality], List[Dict[str, object]]]
.. highlight:: python
.. code-block:: python
Input:
"My army will find your boat. In the meantime, I'm sure we could find you suitable accommodations."
Output:
[[my army will find you boat],
[i be sure, we could find you suitable accommodation]]
"""
if output_format not in ["Eventuality", "json"]:
raise NotImplementedError("Error: extract_from_text only supports Eventuality or json.")
parsed_result = self.parse_text(text, annotators)
return self.extract_from_parsed_result(parsed_result, output_format, in_order, use_lemma, **kw)
[docs] def extract_from_parsed_result(self, parsed_result, output_format="Eventuality", in_order=True, use_lemma=True, **kw):
""" Extract eventualities from the parsed result
:param parsed_result: the parsed result returned by corenlp
:type parsed_result: List[Dict[str, object]]
:param output_format: which format to return, "Eventuality" or "json"
:type output_format: str (default = "Eventuality")
:param in_order: whether the returned order follows the input token order
:type in_order: bool (default = True)
:param use_lemma: whether the returned eventuality uses lemma
:type use_lemma: bool (default = True)
:param kw: other parameters
:type kw: Dict[str, object]
:return: the extracted eventualities
:rtype: Union[List[List[aser.eventuality.Eventuality]], List[List[Dict[str, object]]], List[aser.eventuality.Eventuality], List[Dict[str, object]]]
.. highlight:: python
.. code-block:: python
Input:
[{'dependencies': [(1, 'nmod:poss', 0),
(3, 'nsubj', 1),
(3, 'aux', 2),
(3, 'dobj', 5),
(3, 'punct', 6),
(5, 'nmod:poss', 4)],
'lemmas': ['my', 'army', 'will', 'find', 'you', 'boat', '.'],
'mentions': [],
'ners': ['O', 'O', 'O', 'O', 'O', 'O', 'O'],
'parse': '(ROOT (S (NP (PRP$ My) (NN army)) (VP (MD will) (VP (VB find) (NP '
'(PRP$ your) (NN boat)))) (. .)))',
'pos_tags': ['PRP$', 'NN', 'MD', 'VB', 'PRP$', 'NN', '.'],
'text': 'My army will find your boat.',
'tokens': ['My', 'army', 'will', 'find', 'your', 'boat', '.']},
{'dependencies': [(2, 'case', 0),
(2, 'det', 1),
(6, 'nmod:in', 2),
(6, 'punct', 3),
(6, 'nsubj', 4),
(6, 'cop', 5),
(6, 'ccomp', 9),
(6, 'punct', 13),
(9, 'nsubj', 7),
(9, 'aux', 8),
(9, 'iobj', 10),
(9, 'dobj', 12),
(12, 'amod', 11)],
'lemmas': ['in',
'the',
'meantime',
',',
'I',
'be',
'sure',
'we',
'could',
'find',
'you',
'suitable',
'accommodation',
'.'],
'mentions': [],
'ners': ['O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O',
'O'],
'parse': '(ROOT (S (PP (IN In) (NP (DT the) (NN meantime))) (, ,) (NP (PRP '
"I)) (VP (VBP 'm) (ADJP (JJ sure) (SBAR (S (NP (PRP we)) (VP (MD "
'could) (VP (VB find) (NP (PRP you)) (NP (JJ suitable) (NNS '
'accommodations)))))))) (. .)))',
'pos_tags': ['IN',
'DT',
'NN',
',',
'PRP',
'VBP',
'JJ',
'PRP',
'MD',
'VB',
'PRP',
'JJ',
'NNS',
'.'],
'text': "In the meantime, I'm sure we could find you suitable "
'accommodations.',
'tokens': ['In',
'the',
'meantime',
',',
'I',
"'m",
'sure',
'we',
'could',
'find',
'you',
'suitable',
'accommodations',
'.']}]
Output:
[[my army will find you boat],
[i be sure, we could find you suitable accommodation]]
"""
if output_format not in ["Eventuality", "json"]:
raise NotImplementedError("Error: extract_from_parsed_result only supports Eventuality or json.")
raise NotImplementedError
[docs]class SeedRuleEventualityExtractor(BaseEventualityExtractor):
""" ASER eventuality extractor based on rules to extract eventualities (for ASER v1.0)
"""
def __init__(self, corenlp_path="", corenlp_port=0, **kw):
"""
:param corenlp_path: corenlp path, e.g., /home/xliucr/stanford-corenlp-3.9.2
:type corenlp_path: str (default = "")
:param corenlp_port: corenlp port, e.g., 9000
:type corenlp_port: int (default = 0)
:param kw: other parameters, e.g., "skip_words" to drop sentences that contain such words
:type kw: Dict[str, object]
"""
super().__init__(corenlp_path, corenlp_port, **kw)
self.skip_words = kw.get("skip_words", set())
if not isinstance(self.skip_words, set):
self.skip_words = set(self.skip_words)
[docs] def extract_from_parsed_result(self, parsed_result, output_format="Eventuality", in_order=True, use_lemma=True, **kw):
if output_format not in ["Eventuality", "json"]:
raise NotImplementedError("Error: extract_from_parsed_result only supports Eventuality or json.")
if not isinstance(parsed_result, (list, tuple, dict)):
raise NotImplementedError
if isinstance(parsed_result, dict):
is_single_sent = True
parsed_result = [parsed_result]
else:
is_single_sent = False
eventuality_rules = kw.get("eventuality_rules", None)
if eventuality_rules is None:
eventuality_rules = ALL_EVENTUALITY_RULES
para_eventualities = [list() for _ in range(len(parsed_result))]
for sent_parsed_result, sent_eventualities in zip(parsed_result, para_eventualities):
if self.skip_words and set(sent_parsed_result["tokens"]) & self.skip_words:
continue
seed_rule_eventualities = dict()
# print(sent_parsed_result["tokens"])
for rule_name in eventuality_rules:
tmp_eventualities = self._extract_eventualities_from_dependencies_with_single_rule(
sent_parsed_result, eventuality_rules[rule_name], rule_name, use_lemma
)
seed_rule_eventualities[rule_name] = tmp_eventualities
# print("rule", rule_name, tmp_eventualities)
seed_rule_eventualities = self._filter_special_case(seed_rule_eventualities)
# print("-------------")
for eventualities in seed_rule_eventualities.values():
sent_eventualities.extend(eventualities)
if in_order:
para_eventualities = [
sorted(sent_eventualities, key=lambda e: e.position) for sent_eventualities in para_eventualities
]
if output_format == "json":
para_eventualities = [
[eventuality.encode(encoding=None) for eventuality in sent_eventualities]
for sent_eventualities in para_eventualities
]
if is_single_sent:
return para_eventualities[0]
else:
return para_eventualities
else:
eid2eventuality = dict()
for eventuality in chain.from_iterable(para_eventualities):
eid = eventuality.eid
if eid not in eid2eventuality:
eid2eventuality[eid] = deepcopy(eventuality)
else:
eid2eventuality[eid].update(eventuality)
if output_format == "Eventuality":
eventualities = sorted(eid2eventuality.values(), key=lambda e: e.eid)
elif output_format == "json":
eventualities = sorted(
[eventuality.encode(encoding=None) for eventuality in eid2eventuality.values()],
key=lambda e: e["eid"]
)
return eventualities
def _extract_eventualities_from_dependencies_with_single_rule(
self, sent_parsed_result, eventuality_rule, rule_name, use_lemma
):
local_eventualities = list()
verb_positions = [i for i, tag in enumerate(sent_parsed_result["pos_tags"]) if tag.startswith("VB")]
for verb_position in verb_positions:
tmp_e = self._extract_eventuality_with_fixed_target(
sent_parsed_result, eventuality_rule, verb_position, rule_name, use_lemma
)
if tmp_e is not None:
local_eventualities.append(tmp_e)
return local_eventualities
def _extract_eventuality_with_fixed_target(self, sent_parsed_result, eventuality_rule, verb_position, rule_name, use_lemma):
selected_edges = list()
selected_skeleton_edges = list()
local_dict = {'V1': verb_position}
for tmp_rule_r in eventuality_rule.positive_rules:
foundmatch = False
for dep_r in sent_parsed_result["dependencies"]:
decision, local_dict = self._match_rule_r_and_dep_r(tmp_rule_r, dep_r, local_dict)
if decision:
selected_edges.append(dep_r)
selected_skeleton_edges.append(dep_r)
foundmatch = True
break
if not foundmatch:
# print('Miss one positive relation')
return None
for tmp_rule_r in eventuality_rule.possible_rules:
for dep_r in sent_parsed_result["dependencies"]:
decision, local_dict = self._match_rule_r_and_dep_r(tmp_rule_r, dep_r, local_dict)
if decision:
selected_edges.append(dep_r)
for tmp_rule_r in eventuality_rule.negative_rules:
for dep_r in sent_parsed_result["dependencies"]:
if dep_r in selected_edges:
# print('This edge is selected by the positive example, so we will skip it')
continue
decision, local_dict = self._match_rule_r_and_dep_r(tmp_rule_r, dep_r, local_dict)
if decision:
# print('found one negative relation')
return None
if len(selected_edges) > 0:
event = Eventuality(
pattern=rule_name,
dependencies=selected_edges,
skeleton_dependencies=selected_skeleton_edges,
parsed_result=sent_parsed_result,
use_lemma=use_lemma
)
if len(event) > 0:
return event
else:
return event
else:
return None
@staticmethod
def _match_rule_r_and_dep_r(rule_r, dep_r, current_dict):
tmp_dict = {key: val for key, val in current_dict.items()}
if rule_r[1][0] == '-':
tmp_relations = rule_r[1][1:].split('/')
if rule_r[0] in current_dict and dep_r[0] == current_dict[rule_r[0]]:
if dep_r[1] in tmp_relations:
return False, current_dict
else:
# print(dep_r[1])
return True, tmp_dict
if rule_r[1][0] == '+':
tmp_relations = rule_r[1][1:].split('/')
if rule_r[0] in current_dict and dep_r[0] == current_dict[rule_r[0]]:
if dep_r[1] in tmp_relations:
tmp_dict[rule_r[2]] = dep_r[2]
return True, tmp_dict
else:
# print(dep_r[1])
return False, current_dict
if rule_r[1][0] == '^':
tmp_dep_r = list()
tmp_dep_r.append(dep_r[2])
tmp_dep_r.append(dep_r[1])
tmp_dep_r.append(dep_r[0])
tmp_rule_r = list()
tmp_rule_r.append(rule_r[2])
tmp_rule_r.append(rule_r[1][1:])
tmp_rule_r.append(rule_r[0])
if tmp_rule_r[1] == tmp_dep_r[1]:
if tmp_rule_r[0] in current_dict and tmp_dep_r[0] == current_dict[tmp_rule_r[0]]:
if tmp_rule_r[2] not in tmp_dict:
tmp_dict[tmp_rule_r[2]] = tmp_dep_r[2]
return True, tmp_dict
else:
tmp_dep_r = dep_r
tmp_rule_r = rule_r
if tmp_rule_r[1] == tmp_dep_r[1]:
if tmp_rule_r[0] in current_dict and tmp_dep_r[0] == current_dict[tmp_rule_r[0]]:
if tmp_rule_r[2] not in tmp_dict:
tmp_dict[tmp_rule_r[2]] = tmp_dep_r[2]
return True, tmp_dict
return False, current_dict
@staticmethod
def _filter_special_case(extracted_eventualities):
for k, v in extracted_eventualities.items():
extracted_eventualities[k] = [e for e in v if "|" not in e.words]
extracted_eventualities['s-v-a'] = []
extracted_eventualities['s-be-o'] = []
extracted_eventualities['s-v-be-o'] = []
extracted_eventualities['s-v-o-be-o'] = []
if len(extracted_eventualities['s-v-v']) > 0:
tmp_s_v_v = list()
tmp_s_v_a = list()
for e in extracted_eventualities['s-v-v']:
for edge in e.dependencies:
if edge[1] == 'xcomp':
if 'VB' in edge[2][2]:
tmp_s_v_v.append(e)
if 'JJ' in edge[2][2]:
e.pattern = 's-v-a'
tmp_s_v_a.append(e)
break
extracted_eventualities['s-v-v'] = tmp_s_v_v
extracted_eventualities['s-v-a'] = tmp_s_v_a
if len(extracted_eventualities['s-v-be-a']) > 0:
tmp_s_v_be_a = list()
tmp_s_v_be_o = list()
for e in extracted_eventualities['s-v-be-a']:
for edge in e.dependencies:
if edge[1] == 'xcomp':
if 'JJ' in edge[2][2]:
tmp_s_v_be_a.append(e)
if 'NN' in edge[2][2]:
e.pattern = 's-v-be-o'
tmp_s_v_be_o.append(e)
break
extracted_eventualities['s-v-be-a'] = tmp_s_v_be_a
extracted_eventualities['s-v-be-o'] = tmp_s_v_be_o
if len(extracted_eventualities['s-be-a']) > 0:
tmp_s_be_a = list()
tmp_s_be_o = list()
for e in extracted_eventualities['s-be-a']:
for edge in e.dependencies:
if edge[1] == 'cop':
if 'JJ' in edge[0][2]:
tmp_s_be_a.append(e)
if 'NN' in edge[0][2]:
e.pattern = 's-be-o'
tmp_s_be_o.append(e)
break
extracted_eventualities['s-be-a'] = tmp_s_be_a
extracted_eventualities['s-be-o'] = tmp_s_be_o
if len(extracted_eventualities['s-v-o-be-a']) > 0:
tmp_s_v_o_be_a = list()
tmp_s_v_o_be_o = list()
for e in extracted_eventualities['s-v-o-be-a']:
for edge in e.dependencies:
if edge[1] == 'xcomp':
if 'JJ' in edge[2][2]:
tmp_s_v_o_be_a.append(e)
if 'NN' in edge[2][2]:
e.pattern = 's-v-o-be-o'
tmp_s_v_o_be_o.append(e)
break
extracted_eventualities['s-v-o-be-a'] = tmp_s_v_o_be_a
extracted_eventualities['s-v-o-be-o'] = tmp_s_v_o_be_o
if len(extracted_eventualities['s-v']) > 0:
tmp_s_v = list()
for e in extracted_eventualities['s-v']:
for edge in e.dependencies:
if edge[1] == 'nsubj':
if edge[0][0] > edge[2][0] or edge[0][1] == 'be':
tmp_s_v.append(e)
extracted_eventualities['s-v'] = tmp_s_v
return extracted_eventualities
[docs]class DiscourseEventualityExtractor(BaseEventualityExtractor):
""" ASER eventuality extractor based on constituency analysis to extract eventualities (for ASER v2.0)
"""
def __init__(self, corenlp_path="", corenlp_port=0, **kw):
super().__init__(corenlp_path, corenlp_port, **kw)
self.seed_rule_eventuality_extractor = SeedRuleEventualityExtractor(**kw)
self.conn_extractor = ConnectiveExtractor(**kw)
[docs] def extract_from_parsed_result(self, parsed_result, output_format="Eventuality", in_order=True, use_lemma=True, **kw):
if output_format not in ["Eventuality", "json"]:
raise NotImplementedError("Error: extract_from_parsed_result only supports Eventuality or json.")
if not isinstance(parsed_result, (list, tuple, dict)):
raise NotImplementedError
if isinstance(parsed_result, dict):
is_single_sent = True
parsed_result = [parsed_result]
else:
is_single_sent = False
syntax_tree_cache = kw.get("syntax_tree_cache", dict())
para_eventualities = [list() for _ in range(len(parsed_result))]
para_clauses = self._extract_clauses(parsed_result, syntax_tree_cache)
for sent_parsed_result, sent_clauses, sent_eventualities in zip(
parsed_result, para_clauses, para_eventualities
):
for clause in sent_clauses:
len_clause = len(clause)
idx_mapping = {j: i for i, j in enumerate(clause)}
indices_set = set(clause)
clause_parsed_result = {
"text": "",
"dependencies": [(idx_mapping[dep[0]], dep[1], idx_mapping[dep[2]]) for dep in sent_parsed_result["dependencies"] \
if dep[0] in indices_set and dep[2] in indices_set],
"tokens": [sent_parsed_result["tokens"][idx] for idx in clause],
"pos_tags": [sent_parsed_result["pos_tags"][idx] for idx in clause],
"lemmas": [sent_parsed_result["lemmas"][idx] for idx in clause]}
if "ners" in sent_parsed_result:
clause_parsed_result["ners"] = [sent_parsed_result["ners"][idx] for idx in clause]
if "mentions" in sent_parsed_result:
clause_parsed_result["mentions"] = list()
for mention in sent_parsed_result["mentions"]:
start_idx = bisect.bisect_left(clause, mention["start"])
if not (start_idx < len_clause and clause[start_idx] == mention["start"]):
continue
end_idx = bisect.bisect_left(clause, mention["end"] - 1)
if not (end_idx < len_clause and clause[end_idx] == mention["end"] - 1):
continue
mention = copy(mention)
mention["start"] = start_idx
mention["end"] = end_idx + 1
clause_parsed_result["mentions"].append(mention)
eventualities = self.seed_rule_eventuality_extractor.extract_from_parsed_result(
clause_parsed_result, output_format="Eventuality", in_order=True, use_lemma=use_lemma, **kw
)
len_existed_eventualities = len(sent_eventualities)
for e in eventualities:
for k, v in e.raw_sent_mapping.items():
e.raw_sent_mapping[k] = clause[v]
e.eid = Eventuality.generate_eid(e)
existed_eventuality = False
for e_idx in range(len_existed_eventualities):
if sent_eventualities[e_idx].eid == e.eid and \
sent_eventualities[e_idx].raw_sent_mapping == e.raw_sent_mapping:
existed_eventuality = True
break
if not existed_eventuality:
sent_eventualities.append(e)
if in_order:
para_eventualities = [
sorted(sent_eventualities, key=lambda e: e.position) for sent_eventualities in para_eventualities
]
if output_format == "json":
para_eventualities = [
[eventuality.encode(encoding=None) for eventuality in sent_eventualities]
for sent_eventualities in para_eventualities
]
if is_single_sent:
return para_eventualities[0]
else:
return para_eventualities
else:
eid2eventuality = dict()
for eventuality in chain.from_iterable(para_eventualities):
eid = eventuality.eid
if eid not in eid2eventuality:
eid2eventuality[eid] = deepcopy(eventuality)
else:
eid2eventuality[eid].update(eventuality)
if output_format == "Eventuality":
eventualities = sorted(eid2eventuality.values(), key=lambda e: e.eid)
elif output_format == "json":
eventualities = sorted(
[eventuality.encode(encoding=None) for eventuality in eid2eventuality.values()],
key=lambda e: e["eid"]
)
return eventualities
def _extract_clauses(self, parsed_result, syntax_tree_cache):
para_arguments = [set() for _ in range(len(parsed_result))]
connectives = self.conn_extractor.extract(parsed_result, syntax_tree_cache)
para_connectives = [set() for _ in range(len(parsed_result))]
for connective in connectives:
sent_idx, indices = connective["sent_idx"], tuple(connective["indices"])
para_connectives[sent_idx].add(indices)
for sent_idx, sent_parsed_result in enumerate(parsed_result):
sent_connectives = para_connectives[sent_idx]
sent_arguments = para_arguments[sent_idx]
if sent_idx in syntax_tree_cache:
syntax_tree = syntax_tree_cache[sent_idx]
else:
syntax_tree = syntax_tree_cache[sent_idx] = SyntaxTree(sent_parsed_result["parse"])
# more but slower
# for indices in powerset(sent_connectives):
# indices = set(chain.from_iterable(indices))
# sent_arguments.update(get_clauses(sent_parsed_result, syntax_tree, sep_indices=indices))
sent_arguments.update(
get_clauses(sent_parsed_result, syntax_tree, sep_indices=set(chain.from_iterable(sent_connectives)))
)
# print("'clause indices':", para_arguments)
return para_arguments