SPPAS 4.20

Module sppas.src.annotations

Class sppasLexMetric

Description

SPPAS integration of the occ and rank estimator.

Constructor

Create a new sppasLexMetric instance.

Parameters

log: (sppasLog) Human-readable logs.

View Source

def __init__(self, log=None):
    """Create a new sppasLexMetric instance.

    :param log: (sppasLog) Human-readable logs.

    """
    super(sppasLexMetric, self).__init__('lexmetric.json', log)
    self._separators = ['#', '+', 'dummy']

Public functions

fix_options

Fix all options.

Parameters

options: list of sppasOption instances

View Source

def fix_options(self, options):
    """Fix all options.

        :param options: list of sppasOption instances

        """
    for opt in options:
        key = opt.get_key()
        if 'alt' == key:
            self.set_alt(opt.get_value())
        elif 'tiername' == key:
            self.set_tiername(opt.get_value())
        elif 'separators' == key:
            self.set_segments_separators(opt.get_value())
        elif 'pattern' in key:
            self._options[key] = opt.get_value()
        else:
            raise AnnotationOptionError(key)

set_alt

Fix the alt option, used to estimate occ and rank.

Parameters

alt: (bool)

View Source

def set_alt(self, alt):
    """Fix the alt option, used to estimate occ and rank.

        :param alt: (bool)

        """
    self._options['alt'] = bool(alt)

set_tiername

Fix the tiername option.

Parameters

tier_name: (str)

View Source

def set_tiername(self, tier_name):
    """Fix the tiername option.

        :param tier_name: (str)

        """
    self._options['tiername'] = sppasUnicode(tier_name).to_strip()

set_segments_separators

Fix the separators to create segments.

Parameters

entry: (str) Entries separated by whitespace.

View Source

def set_segments_separators(self, entry):
    """Fix the separators to create segments.

        :param entry: (str) Entries separated by whitespace.

        """
    sp = sppasUnicode(entry)
    tg = sp.to_strip()
    if len(tg) > 0:
        self._separators = tg.split()
    else:
        self._separators = list()

tier_to_segment_occ

Create segment intervals and eval the number of occurrences.

Parameters

input_tier: (sppasTier)

Returns

(sppasTier)

View Source

def tier_to_segment_occ(self, input_tier):
    """Create segment intervals and eval the number of occurrences.

        :param input_tier: (sppasTier)
        :returns: (sppasTier)

        """
    if len(self._separators) > 0:
        occ_ann = input_tier.export_to_intervals(self._separators)
    else:
        occ_ann = input_tier.copy()
        occ_ann.gen_id()
    occ_ann.set_name('LM-OccAnnInSegments')
    occ_lab = occ_ann.copy()
    occ_lab.gen_id()
    occ_lab.set_name('LM-OccLabInSegments')
    for tg1, tg2 in zip(occ_ann, occ_lab):
        values_anns = input_tier.find(tg1.get_lowest_localization(), tg1.get_highest_localization())
        tg1.set_labels([sppasLabel(sppasTag(str(len(values_anns)), 'int'))])
        nbl = 0
        for a in values_anns:
            nbl += len(a.get_labels())
        tg2.set_labels([sppasLabel(sppasTag(str(nbl), 'int'))])
    return (occ_ann, occ_lab)

get_input_tier

Return the input tier from the inputs.

Parameters

input_files: (list)

View Source

def get_input_tier(self, input_files):
    """Return the input tier from the inputs.

        :param input_files: (list)

        """
    for filename in input_files:
        parser = sppasTrsRW(filename)
        trs_input = parser.read()
        tier_spk = trs_input.find(self._options['tiername'], case_sensitive=False)
        if tier_spk is not None:
            return tier_spk
    logging.error("Tier with name '{:s}' not found in input file.".format(self._options['tiername']))
    raise NoTierInputError

run

Run the automatic annotation process on an input.

Parameters

input_files: (list of str) Time-aligned tokens, or other

output: (str) the output file name

Returns

(sppasTranscription)

View Source

def run(self, input_files, output=None):
    """Run the automatic annotation process on an input.

        :param input_files: (list of str) Time-aligned tokens, or other
        :param output: (str) the output file name
        :returns: (sppasTranscription)

        """
    tier = self.get_input_tier(input_files)
    ocrk = OccRank(tier)
    occ_tier = ocrk.occ()
    rank_tier = ocrk.rank()
    sgmt_occ_ann_tier, sgmt_occ_lab_tier = self.tier_to_segment_occ(tier)
    trs_output = sppasTranscription(self.name)
    trs_output.set_meta('token_lexmetric_result_of', input_files[0])
    trs_output.append(occ_tier)
    trs_output.append(rank_tier)
    trs_output.append(sgmt_occ_ann_tier)
    trs_output.append(sgmt_occ_lab_tier)
    if output is not None:
        if len(trs_output) > 0:
            output_file = self.fix_out_file_ext(output)
            parser = sppasTrsRW(output_file)
            parser.write(trs_output)
            return [output_file]
        else:
            raise EmptyOutputError
    return trs_output

get_output_pattern

Pattern this annotation uses in an output filename.

View Source

def get_output_pattern(self):
    """Pattern this annotation uses in an output filename."""
    return self._options.get('outputpattern', '-lexm')