| 1 | #!/usr/bin/python
 | 
  
    | 2 | # -*- coding: utf-8 -*-
 | 
  
    | 3 | 
 | 
  
    | 4 | # --------------------------------------------------------------- #
 | 
  
    | 5 | #    Library for a simple processing of ASF files.                #
 | 
  
    | 6 | # --------------------------------------------------------------- #
 | 
  
    | 7 | #    2011 - 2016  Zdenek Hanzlicek (zhanzlic@ntis.zcu.cz)         #
 | 
  
    | 8 | #                 NTIS, University of West Bohemia                #
 | 
  
    | 9 | # --------------------------------------------------------------- #
 | 
  
    | 10 | 
 | 
  
    | 11 | # SVN $Id: asflight.py 1756 2016-04-15 13:39:57Z zhanzlic $
 | 
  
    | 12 | 
 | 
  
    | 13 | 
 | 
  
    | 14 | import codecs
 | 
  
    | 15 | import os
 | 
  
    | 16 | import os.path
 | 
  
    | 17 | 
 | 
  
    | 18 | 
 | 
  
    | 19 | class AsfLight:
 | 
  
    | 20 | 
 | 
  
    | 21 |     def __init__( self, file_name=None, code_page='utf-8' ):
 | 
  
    | 22 | 
 | 
  
    | 23 |         self.utts = dict()  # particular utterances
 | 
  
    | 24 |         self.attrib_order = list()  # order of attributes for all utterances
 | 
  
    | 25 |         self.header = list()  # list of lines with comments
 | 
  
    | 26 | 
 | 
  
    | 27 |         if file_name is not None:
 | 
  
    | 28 |             self.read( file_name )
 | 
  
    | 29 | 
 | 
  
    | 30 | 
 | 
  
    | 31 |     ## ==========----------
 | 
  
    | 32 | 
 | 
  
    | 33 | 
 | 
  
    | 34 |     def __len__( self ):
 | 
  
    | 35 |         return len( self.utts )
 | 
  
    | 36 | 
 | 
  
    | 37 | 
 | 
  
    | 38 |     ## ==========----------
 | 
  
    | 39 | 
 | 
  
    | 40 | 
 | 
  
    | 41 |     def __iter__( self ):
 | 
  
    | 42 |         return iter( self.utts )
 | 
  
    | 43 | 
 | 
  
    | 44 | 
 | 
  
    | 45 |     ## ==========----------
 | 
  
    | 46 | 
 | 
  
    | 47 | 
 | 
  
    | 48 |     def __setitem__( self, key, value ):
 | 
  
    | 49 |         self.utts[ key ] = value
 | 
  
    | 50 | 
 | 
  
    | 51 | 
 | 
  
    | 52 |     ## ==========----------
 | 
  
    | 53 | 
 | 
  
    | 54 | 
 | 
  
    | 55 |     def __getitem__( self, key ):
 | 
  
    | 56 |         return self.utts[ key ]
 | 
  
    | 57 | 
 | 
  
    | 58 | 
 | 
  
    | 59 |     ## ==========----------
 | 
  
    | 60 | 
 | 
  
    | 61 | 
 | 
  
    | 62 |     def __contains__( self, item ):
 | 
  
    | 63 |         return item in self.utts
 | 
  
    | 64 | 
 | 
  
    | 65 | 
 | 
  
    | 66 |     ## ==========----------
 | 
  
    | 67 | 
 | 
  
    | 68 | 
 | 
  
    | 69 |     def read( self, file_name, code_page='utf-8' ):
 | 
  
    | 70 | 
 | 
  
    | 71 |         asf_handle = codecs.open( file_name, 'rt', code_page )
 | 
  
    | 72 |         asf_content = asf_handle.readlines()
 | 
  
    | 73 |         asf_handle.close()
 | 
  
    | 74 | 
 | 
  
    | 75 |         for asf_line in asf_content:
 | 
  
    | 76 |             asf_line = asf_line.strip()
 | 
  
    | 77 | 
 | 
  
    | 78 |             if asf_line == "":  # empty line
 | 
  
    | 79 |                 continue
 | 
  
    | 80 | 
 | 
  
    | 81 |             if asf_line.startswith("#"):  # header / comment
 | 
  
    | 82 |                 self.header.append( asf_line )
 | 
  
    | 83 | 
 | 
  
    | 84 |             elif asf_line.startswith('"') and asf_line.endswith('"'):  # new utterance
 | 
  
    | 85 |                 utt_name = asf_line[1:-1]
 | 
  
    | 86 |                 utt_units = list()
 | 
  
    | 87 |                 self.utts[ utt_name ] = utt_units
 | 
  
    | 88 | 
 | 
  
    | 89 |             elif asf_line.startswith("|") and asf_line.endswith('|'):  # unit
 | 
  
    | 90 |                 attrib_vals = [ attrib_val.strip() for attrib_val in asf_line[1:-1].split("|") ]
 | 
  
    | 91 |                 utt_units.append( { self.attrib_order[ idx ]:attrib_vals[ idx ] for idx in range( len( self.attrib_order ) ) } )
 | 
  
    | 92 | 
 | 
  
    | 93 |             elif asf_line.startswith("[") and asf_line.endswith("]"):  # list of attribute names
 | 
  
    | 94 |                 self.attrib_order = [ attrib_name.strip() for attrib_name in asf_line[1:-1].split("|") ]
 | 
  
    | 95 | 
 | 
  
    | 96 | 
 | 
  
    | 97 |     ## ==========----------
 | 
  
    | 98 | 
 | 
  
    | 99 | 
 | 
  
    | 100 |     def write( self, file_name, code_page='utf-8' ):
 | 
  
    | 101 | 
 | 
  
    | 102 |         asf_handle = codecs.open( file_name, 'wt', code_page )
 | 
  
    | 103 | 
 | 
  
    | 104 |         if len( self.header ):
 | 
  
    | 105 |             asf_handle.write( "\n".join( self.header ) )
 | 
  
    | 106 |             asf_handle.write( "\n\n" )
 | 
  
    | 107 | 
 | 
  
    | 108 |         attrib_lens = { attrib_name:len( attrib_name ) for attrib_name in self.attrib_order }
 | 
  
    | 109 | 
 | 
  
    | 110 |         # get the maximum lenghts for particular attributes
 | 
  
    | 111 |         for units in self.utts.itervalues():
 | 
  
    | 112 |             for unit in units:
 | 
  
    | 113 |                 for attrib_name in unit:
 | 
  
    | 114 | 
 | 
  
    | 115 |                     attrib_val = unit[ attrib_name ]
 | 
  
    | 116 |                     if not isinstance( attrib_val, unicode ):
 | 
  
    | 117 |                         attrib_len = len( unicode( attrib_val ) )
 | 
  
    | 118 |                     else:
 | 
  
    | 119 |                         attrib_len = len( attrib_val )
 | 
  
    | 120 | 
 | 
  
    | 121 |                     if attrib_lens[ attrib_name ] < attrib_len:
 | 
  
    | 122 |                         attrib_lens[ attrib_name ] = attrib_len
 | 
  
    | 123 | 
 | 
  
    | 124 |         # write list of attribute names
 | 
  
    | 125 |         asf_handle.write( "[ " + " | ".join( [ attrib_name + " "*( attrib_lens[ attrib_name ] - len( attrib_name ) ) for attrib_name in self.attrib_order ] ) + " ]\n\n" )
 | 
  
    | 126 | 
 | 
  
    | 127 |         for utt_name in sorted( self.utts.iterkeys() ):
 | 
  
    | 128 |             asf_handle.write( '"' + utt_name + '"\n' )
 | 
  
    | 129 | 
 | 
  
    | 130 |             for unit in self.utts[ utt_name ]:
 | 
  
    | 131 |                 attrib_vals = { attrib_name:( unit[ attrib_name ] if isinstance( unit[ attrib_name ], unicode ) else unicode( unit[ attrib_name ] ) ) for attrib_name in self.attrib_order }
 | 
  
    | 132 |                 asf_handle.write( "| " + " | ".join( [ attrib_vals[ attrib_name ] + " "*( attrib_lens[ attrib_name ] - len( attrib_vals[ attrib_name ] ) ) for attrib_name in self.attrib_order ] ) + " |\n" )
 | 
  
    | 133 | 
 | 
  
    | 134 |             asf_handle.write( "\n" )
 | 
  
    | 135 | 
 | 
  
    | 136 |         asf_handle.close()
 | 
  
    | 137 | 
 |