Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add PLP Model #5

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion algophon/models/D2L/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from algophon.models.D2L.discrepancy import Discrepancy
from algophon.models.D2L.tier import Tier
from algophon.models.D2L.rule import Rule
from algophon.models.D2L.rule import D2LRule
from algophon.models.D2L.d2l import D2L
10 changes: 5 additions & 5 deletions algophon/models/D2L/d2l.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from algophon import SegInv, SegStr, NatClass
from algophon.symbols import UNDERSPECIFIED, BOUNDARIES
from algophon.models.D2L import Discrepancy, Rule, Tier
from algophon.models.D2L import Discrepancy, D2LRule, Tier
from algophon.utils import tsp

class D2L:
Expand Down Expand Up @@ -187,11 +187,11 @@ def _train_setup(self, pairs: Iterable) -> set:
if self._discrepancy is None: # init discrepancy if it does not exist
self._discrepancy = Discrepancy(feat_diff)
# tabulate this pair's contribution to the discrepancy
self._discrepancy.tabulate(ur=ur, i=i, ur_seg=ur_seg, sr_seg=sr_seg)
self._discrepancy.tabulate(ur=ur, ur_seg=ur_seg, sr_seg=sr_seg)

return setup_pairs

def build_rule(self, pairs: set, delset: set=set(), tier=None, harmony: bool=True, discrepancy: Union[None, Discrepancy]=None) -> Rule:
def build_rule(self, pairs: set, delset: set=set(), tier=None, harmony: bool=True, discrepancy: Union[None, Discrepancy]=None) -> D2LRule:
'''
Builds a rule recursively.

Expand All @@ -208,13 +208,13 @@ def build_rule(self, pairs: set, delset: set=set(), tier=None, harmony: bool=Tru

lctxts, rctxts = self._get_tier_adj_contexts(discrepancy=discrepancy, tier=tier) # compute ctxts
# build left rule
left_rule = Rule(seginv=self.seginv, target=target, features=discrepancy.feature_diff, left_ctxts=lctxts, tier=tier, harmony=harmony)
left_rule = D2LRule(seginv=self.seginv, target=target, features=discrepancy.feature_diff, left_ctxts=lctxts, tier=tier, harmony=harmony)
left_underextensions = left_rule.underextension_SRs(pairs=pairs)
if len(left_underextensions) > 0:
left_default_sr = sorted(left_underextensions.items(), reverse=True, key=lambda it: it[-1])[0][0]
left_rule.set_defaults(dict((feat, left_default_sr.features[feat]) for feat in discrepancy.feature_diff))
# build right rule
right_rule = Rule(seginv=self.seginv, target=target, features=discrepancy.feature_diff, right_ctxts=rctxts, tier=tier, harmony=harmony)
right_rule = D2LRule(seginv=self.seginv, target=target, features=discrepancy.feature_diff, right_ctxts=rctxts, tier=tier, harmony=harmony)
right_underextensions = right_rule.underextension_SRs(pairs=pairs)
if len(right_underextensions) > 0:
right_default_sr = sorted(right_underextensions.items(), reverse=True, key=lambda it: it[-1])[0][0]
Expand Down
26 changes: 5 additions & 21 deletions algophon/models/D2L/discrepancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,18 @@ def __init__(self, feature_diff: set) -> object:
:feature_diff: the features that differ between alternating ur_seg ~ sr_seg pairs
'''
self.alternations = set() # stores the (ur_seg ~ sr_seg) alternations corresponding to the discrepancy
self.instances = set() # stores each instance of the discrepancy
self.URs = set() # stores each UR with a discrepancy
self.feature_diff = feature_diff

def __contains__(self, item: tuple) -> bool:
return item in self.alternations

def __str__(self) -> str:
return self.feature_diff.__str__()

def __repr__(self) -> str:
return self.__str__()

def tabulate(self, ur: SegStr, i: int, ur_seg: Seg, sr_seg: Seg) -> None:
def tabulate(self, ur: SegStr, ur_seg: Seg, sr_seg: Seg) -> None:
'''
:ur: the UR exibiting the discrepancy
:i: the index where the discrepancy occurs
:ur_seg:, :sr_seg: the ur_seg ~ sr_seg alternation

:return: None
'''
self.alternations.add((ur_seg, sr_seg)) # update alternations
self.instances.add((ur, i, sr_seg)) # update instances

def get_alternating(self) -> set:
'''
:return: a set of all the underling and surface Seg objects that are involved in the alternation
'''
return set(it[0] for it in self.alternations).union(it[1] for it in self.alternations)
self.URs.add(ur) # update URs set

def get_alternating_UR_segs(self) -> set:
'''
Expand All @@ -47,6 +31,6 @@ def get_alternating_UR_segs(self) -> set:

def get_URs(self) -> set:
'''
:return: a set of all the URs that are involved in the alternation
:return: the set of all URs that are involved in the alternation
'''
return set(it[0] for it in self.instances)
return self.URs
64 changes: 5 additions & 59 deletions algophon/models/D2L/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from algophon import Seg, SegInv, NatClass, SegStr
from algophon.symbols import FUNCTION_COMPOSITION, LWB, RWB, UNK, UNDERSPECIFIED
from algophon.models.D2L import Tier
from algophon.models import Rule

class Rule:
def __init__(self,
class D2LRule(Rule):
def __init__(self,
seginv: SegInv,
target: set,
features: set,
Expand All @@ -21,9 +22,9 @@ def __init__(self,
:target: a set of target (alternating) segments
:features: a set of features that alternate
:defaults: the values to use for :features: if no ctxt matches for a particular target.
:left_ctxts: (optional; default None) a set of right-adj (to target) segments that trigger rule application
:left_ctxts: (optional; default None) a set of left-adj (to target) segments that trigger rule application
- Can be a set of specific segments or a NatClass object
:right_ctxts: (optional; default None) a set of left-adj (to target) segments that trigger a rule application
:right_ctxts: (optional; default None) a set of right-adj (to target) segments that trigger a rule application
- Can be a set of specific segments or a NatClass object
:tier: (optional; default None) a Tier object to apply the rule over
:harmony: (optional; default True) specified whether rule enforces harmony or disharmony
Expand Down Expand Up @@ -57,61 +58,6 @@ def __str__(self) -> str:
else:
return f'{adj_str} / __ {self.right_ctxts}{tier_str}'

def __repr__(self) -> str:
return self.__str__()

def produce(self, ur: Union[str, SegStr]) -> SegStr:
'''
Produces a SR for and input UR

:ur: the UR to produce an SR for. Can be:
- space-separated str of IPA symbols
- SegStr object

:return: a SegStr representing the predicted SR
'''
if isinstance(ur, str): # convert str ur to SegStr
ur = SegStr(ur, seginv=self.seginv)
new_segs = list(ur._segs) # init new seg list
# apply predictions
for idx, seg in self._predictions(segstr=ur):
new_segs[idx] = seg
return SegStr(segs=new_segs, seginv=self.seginv) # return SegStr object

# calling a Rule object amounts to calling its produce() method
__call__ = produce

def accuracy(self, pairs: Iterable) -> float:
'''
:pairs: an iterable of (UR, SR) pairs to compute accuracy for
- Computed over unique pairs

:return: the accuracy of the rule's predictions of the :pairs:
'''
n, m = self.tsp_stats(pairs=pairs)
return m / n if n > 0 else 0.0

def tsp_stats(self, pairs: Iterable) -> tuple[int, int]:
'''
Computes n and m for the TSP w.r.t a set of pairs

:pairs: an iterable of (UR, SR) pairs to compute the TSP stats for
- Computed over unique pairs

:return: n and m
'''
n, m = 0, 0
for ur, sr in set(pairs):
if isinstance(ur, str):
ur = SegStr(ur, seginv=self.seginv)
if isinstance(sr, str):
sr = SegStr(sr, seginv=self.seginv)
for idx, pred_sr_seg in self._predictions(ur):
n += 1
if sr[idx] == pred_sr_seg:
m += 1
return n, m

def _predictions(self, segstr: SegStr) -> list:
'''
:segstr: a SegStr to apply the rule to
Expand Down
1 change: 1 addition & 0 deletions algophon/models/PLP/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
4 changes: 4 additions & 0 deletions algophon/models/PLP/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from algophon.models.PLP.discrepancy import Discrepancy
from algophon.models.PLP.rule import PLPRule
from algophon.models.PLP.grammar import Grammar
from algophon.models.PLP.plp import PLP
8 changes: 8 additions & 0 deletions algophon/models/PLP/discrepancy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class Discrepancy:
'''
A class for representing a discrepancy—i.e., a difference between URs and SRs.
'''

def __init__(self) -> object:
pass
# TODO
4 changes: 4 additions & 0 deletions algophon/models/PLP/grammar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class Grammar:
def __init__(self) -> object:
pass
# TODO
4 changes: 4 additions & 0 deletions algophon/models/PLP/plp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class PLP:
def __init__(self) -> object:
pass
# TODO
46 changes: 46 additions & 0 deletions algophon/models/PLP/rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import Union

from algophon import SegStr, NatClass
from algophon.models import Rule

class PLPRule(Rule):
'''
Implements an SPE-style rule A -> B / C __ D
'''

def __init__(self,
seginv,
target: set,
left_ctxt: Union[None, list[Union[set, NatClass]]]=None,
right_ctxt: Union[None, list[Union[set, NatClass]]]=None) -> object:
'''
:seginv: a SegInv object
:target: a set of target (alternating) segments
:left_ctxts: (optional; default None) left ctxt that triggers rule application: / C __
- a list of items, each being a set or NatClass object
- distance from :target: interpreted right to left: / [C_0, C_1, ...] __
:right_ctxts: (optional; default None) right ctxt that triggers rule application: / __ D
- a list of items, each being a set or NatClass object
- distance from :target: interpreted left to right: / __ [D_0, D_1, ...]
'''
# init variables
self.seginv = seginv
self.target = target
self.left_ctxt = left_ctxt
self.right_ctxt = right_ctxt

# TODO

def _predictions(self, segstr: SegStr) -> list:
'''
:segstr: a SegStr to apply the rule to

:return: a list of the predictions that the rule makes over :segstr:
- Each item in the list is a tuple (index, new_seg) specifying each new_seg value predicted and at what index
'''
pass
# TODO

def __str__(self) -> str:
pass
# TODO
1 change: 1 addition & 0 deletions algophon/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from algophon.models.rule import Rule
73 changes: 73 additions & 0 deletions algophon/models/rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from typing import Union, Iterable
from abc import abstractmethod

from algophon import SegStr

class Rule:
def __str__(self) -> str:
raise NotImplementedError(f'Method __str__ not implemented for {self} of type {type(self)}')

def __repr__(self) -> str:
return self.__str__()

def produce(self, ur: Union[str, SegStr]) -> SegStr:
'''
Produces a SR for and input UR

:ur: the UR to produce an SR for. Can be:
- space-separated str of IPA symbols
- SegStr object

:return: a SegStr representing the predicted SR
'''
if isinstance(ur, str): # convert str ur to SegStr
ur = SegStr(ur, seginv=self.seginv)
new_segs = list(ur._segs) # init new seg list
# apply predictions
for idx, seg in self._predictions(segstr=ur):
new_segs[idx] = seg
return SegStr(segs=new_segs, seginv=self.seginv) # return SegStr object

# calling a Rule object amounts to calling its produce() method
__call__ = produce

def accuracy(self, pairs: Iterable) -> float:
'''
:pairs: an iterable of (UR, SR) pairs to compute accuracy for
- Computed over unique pairs

:return: the accuracy of the rule's predictions of the :pairs:
'''
n, m = self.tsp_stats(pairs=pairs)
return m / n if n > 0 else 0.0

def tsp_stats(self, pairs: Iterable) -> tuple[int, int]:
'''
Computes n and m for the TSP w.r.t a set of pairs

:pairs: an iterable of (UR, SR) pairs to compute the TSP stats for
- Computed over unique pairs

:return: n and m
'''
n, m = 0, 0
for ur, sr in set(pairs):
if isinstance(ur, str):
ur = SegStr(ur, seginv=self.seginv)
if isinstance(sr, str):
sr = SegStr(sr, seginv=self.seginv)
for idx, pred_sr_seg in self._predictions(ur):
n += 1
if sr[idx] == pred_sr_seg:
m += 1
return n, m

@abstractmethod
def _predictions(self, segstr: SegStr) -> list:
'''
:segstr: a SegStr to apply the rule to

:return: a list of the predictions that the rule makes over :segstr:
- Each item in the list is a tuple (index, new_seg) specifying each new_seg value predicted and at what index
'''
raise NotImplementedError(f'Method _predictions not implemented for {self} of type {type(self)}')
Loading