@ -17,118 +17,123 @@ import shutil
reload ( sys )
reload ( sys )
sys . setdefaultencoding ( " utf-8 " )
sys . setdefaultencoding ( " utf-8 " )
import arg parse
import opt parse
import os
import os
import json
import json
import re
import re
endian = sys . byteorder
endian = sys . byteorder
parser = argparse . ArgumentParser ( description = " ICU Datafile repackager. Example of use: \" mkdir tmp ; python icutrim.py -D ~/Downloads/icudt53l.dat -T tmp -F trim_en.json -O icudt53l.dat \" you will then find a smaller icudt53l.dat in ' tmp ' . " ,
parser = optparse . OptionParser ( usage = " usage: mkdir tmp ; % prog -D ~/Downloads/icudt53l.dat -T tmp -F trim_en.json -O icudt53l.dat " )
epilog = " ICU tool, http://icu-project.org - master copy at http://source.icu-project.org/repos/icu/tools/trunk/scripts/icutrim.py " )
parser . add_argument ( " -P " , " --tool-path " ,
parser . add_option ( " -P " , " --tool-path " ,
action = " store " ,
action = " store " ,
dest = " toolpath " ,
dest = " toolpath " ,
help = " set the prefix directory for ICU tools " )
help = " set the prefix directory for ICU tools " )
parser . add_argument ( " -D " , " --input-file " ,
parser . add_option ( " -D " , " --input-file " ,
action = " store " ,
action = " store " ,
dest = " datfile " ,
dest = " datfile " ,
help = " input data file (icudt__.dat) " ,
help = " input data file (icudt__.dat) " ,
required = True )
) # required
parser . add_argument ( " -F " , " --filter-file " ,
parser . add_option ( " -F " , " --filter-file " ,
action = " store " ,
action = " store " ,
dest = " filterfile " ,
dest = " filterfile " ,
help = " filter file (JSON format) " ,
help = " filter file (JSON format) " ,
required = True )
) # required
parser . add_argument ( " -T " , " --tmp-dir " ,
parser . add_option ( " -T " , " --tmp-dir " ,
action = " store " ,
action = " store " ,
dest = " tmpdir " ,
dest = " tmpdir " ,
help = " working directory. " ,
help = " working directory. " ,
required = True )
) # required
parser . add_argument ( " --delete-tmp " ,
parser . add_option ( " --delete-tmp " ,
action = " count " ,
action = " count " ,
dest = " deltmpdir " ,
dest = " deltmpdir " ,
help = " delete working directory. " ,
help = " delete working directory. " ,
default = 0 )
default = 0 )
parser . add_argument ( " -O " , " --outfile " ,
parser . add_option ( " -O " , " --outfile " ,
action = " store " ,
action = " store " ,
dest = " outfile " ,
dest = " outfile " ,
help = " outfile (NOT a full path) " ,
help = " outfile (NOT a full path) " ,
required = True )
) # required
parser . add_argument ( " -v " , " --verbose " ,
parser . add_option ( " -v " , " --verbose " ,
action = " count " ,
action = " count " ,
default = 0 )
default = 0 )
parser . add_argument ( ' -e ' , ' --endian ' , action = ' store ' , dest = ' endian ' , help = ' endian, big, little or host, your default is " %s " . ' % endian , default = endian , metavar = ' endianness ' )
parser . add_option ( ' -e ' , ' --endian ' , action = ' store ' , dest = ' endian ' , help = ' endian, big, little or host, your default is " %s " . ' % endian , default = endian , metavar = ' endianness ' )
( options , args ) = parser . parse_args ( )
args = parser . parse_args ( )
optVars = vars ( options )
if args . verbose > 0 :
for opt in [ " datfile " , " filterfile " , " tmpdir " , " outfile " ] :
print " Options: " + str ( args )
if optVars [ opt ] is None :
print " Missing required option: %s " % opt
sys . exit ( 1 )
if options . verbose > 0 :
print " Options: " + str ( options )
if ( os . path . isdir ( args . tmpdir ) and args . deltmpdir ) :
if ( os . path . isdir ( option s. tmpdir ) and option s. deltmpdir ) :
if args . verbose > 1 :
if option s. verbose > 1 :
print " Deleting tmp dir %s .. " % ( args . tmpdir )
print " Deleting tmp dir %s .. " % ( option s. tmpdir )
shutil . rmtree ( args . tmpdir )
shutil . rmtree ( option s. tmpdir )
if not ( os . path . isdir ( arg s. tmpdir ) ) :
if not ( os . path . isdir ( option s. tmpdir ) ) :
os . mkdir ( arg s. tmpdir )
os . mkdir ( option s. tmpdir )
else :
else :
print " Please delete tmpdir %s before beginning. " % arg s. tmpdir
print " Please delete tmpdir %s before beginning. " % option s. tmpdir
sys . exit ( 1 )
sys . exit ( 1 )
if arg s. endian not in ( " big " , " little " , " host " ) :
if option s. endian not in ( " big " , " little " , " host " ) :
print " Unknown endianness: %s " % arg s. endian
print " Unknown endianness: %s " % option s. endian
sys . exit ( 1 )
sys . exit ( 1 )
if arg s. endian is " host " :
if option s. endian is " host " :
arg s. endian = endian
option s. endian = endian
if not os . path . isdir ( arg s. tmpdir ) :
if not os . path . isdir ( option s. tmpdir ) :
print " Error, tmpdir not a directory: %s " % ( arg s. tmpdir )
print " Error, tmpdir not a directory: %s " % ( option s. tmpdir )
sys . exit ( 1 )
sys . exit ( 1 )
if not os . path . isfile ( arg s. filterfile ) :
if not os . path . isfile ( option s. filterfile ) :
print " Filterfile doesn ' t exist: %s " % ( arg s. filterfile )
print " Filterfile doesn ' t exist: %s " % ( option s. filterfile )
sys . exit ( 1 )
sys . exit ( 1 )
if not os . path . isfile ( arg s. datfile ) :
if not os . path . isfile ( option s. datfile ) :
print " Datfile doesn ' t exist: %s " % ( arg s. datfile )
print " Datfile doesn ' t exist: %s " % ( option s. datfile )
sys . exit ( 1 )
sys . exit ( 1 )
if not arg s. datfile . endswith ( " .dat " ) :
if not option s. datfile . endswith ( " .dat " ) :
print " Datfile doesn ' t end with .dat: %s " % ( arg s. datfile )
print " Datfile doesn ' t end with .dat: %s " % ( option s. datfile )
sys . exit ( 1 )
sys . exit ( 1 )
outfile = os . path . join ( arg s. tmpdir , arg s. outfile )
outfile = os . path . join ( option s. tmpdir , option s. outfile )
if os . path . isfile ( outfile ) :
if os . path . isfile ( outfile ) :
print " Error, output file does exist: %s " % ( outfile )
print " Error, output file does exist: %s " % ( outfile )
sys . exit ( 1 )
sys . exit ( 1 )
if not arg s. outfile . endswith ( " .dat " ) :
if not option s. outfile . endswith ( " .dat " ) :
print " Outfile doesn ' t end with .dat: %s " % ( arg s. outfile )
print " Outfile doesn ' t end with .dat: %s " % ( option s. outfile )
sys . exit ( 1 )
sys . exit ( 1 )
dataname = arg s. outfile [ 0 : - 4 ]
dataname = option s. outfile [ 0 : - 4 ]
## TODO: need to improve this. Quotes, etc.
## TODO: need to improve this. Quotes, etc.
def runcmd ( tool , cmd , doContinue = False ) :
def runcmd ( tool , cmd , doContinue = False ) :
if ( arg s. toolpath ) :
if ( option s. toolpath ) :
cmd = os . path . join ( arg s. toolpath , tool ) + " " + cmd
cmd = os . path . join ( option s. toolpath , tool ) + " " + cmd
else :
else :
cmd = tool + " " + cmd
cmd = tool + " " + cmd
if ( arg s. verbose > 4 ) :
if ( option s. verbose > 4 ) :
print " # " + cmd
print " # " + cmd
rc = os . system ( cmd )
rc = os . system ( cmd )
@ -138,24 +143,24 @@ def runcmd(tool, cmd, doContinue=False):
return rc
return rc
## STEP 0 - read in json config
## STEP 0 - read in json config
fi = open ( arg s. filterfile , " rb " )
fi = open ( option s. filterfile , " rb " )
config = json . load ( fi )
config = json . load ( fi )
fi . close ( )
fi . close ( )
if ( arg s. verbose > 6 ) :
if ( option s. verbose > 6 ) :
print config
print config
if ( config . has_key ( " comment " ) ) :
if ( config . has_key ( " comment " ) ) :
print " %s : %s " % ( arg s. filterfile , config [ " comment " ] )
print " %s : %s " % ( option s. filterfile , config [ " comment " ] )
## STEP 1 - copy the data file, swapping endianness
## STEP 1 - copy the data file, swapping endianness
endian_letter = " l "
endian_letter = " l "
runcmd ( " icupkg " , " -t %s %s %s " " " % ( endian_letter , arg s. datfile , outfile ) )
runcmd ( " icupkg " , " -t %s %s %s " " " % ( endian_letter , option s. datfile , outfile ) )
## STEP 2 - get listing
## STEP 2 - get listing
listfile = os . path . join ( arg s. tmpdir , " icudata.lst " )
listfile = os . path . join ( option s. tmpdir , " icudata.lst " )
runcmd ( " icupkg " , " -l %s > %s " " " % ( outfile , listfile ) )
runcmd ( " icupkg " , " -l %s > %s " " " % ( outfile , listfile ) )
fi = open ( listfile , ' rb ' )
fi = open ( listfile , ' rb ' )
@ -165,7 +170,7 @@ fi.close()
itemset = set ( items )
itemset = set ( items )
if ( arg s. verbose > 1 ) :
if ( option s. verbose > 1 ) :
print " input file: %d items " % ( len ( items ) )
print " input file: %d items " % ( len ( items ) )
# list of all trees
# list of all trees
@ -192,23 +197,23 @@ def queueForRemoval(tree):
if not config [ " trees " ] . has_key ( tree ) :
if not config [ " trees " ] . has_key ( tree ) :
return
return
mytree = trees [ tree ]
mytree = trees [ tree ]
if ( arg s. verbose > 0 ) :
if ( option s. verbose > 0 ) :
print " * %s : %d items " % ( tree , len ( mytree [ " locs " ] ) )
print " * %s : %d items " % ( tree , len ( mytree [ " locs " ] ) )
# do varible substitution for this tree here
# do varible substitution for this tree here
if type ( config [ " trees " ] [ tree ] ) == str or type ( config [ " trees " ] [ tree ] ) == unicode :
if type ( config [ " trees " ] [ tree ] ) == str or type ( config [ " trees " ] [ tree ] ) == unicode :
treeStr = config [ " trees " ] [ tree ]
treeStr = config [ " trees " ] [ tree ]
if ( arg s. verbose > 5 ) :
if ( option s. verbose > 5 ) :
print " Substituting $ %s for tree %s " % ( treeStr , tree )
print " Substituting $ %s for tree %s " % ( treeStr , tree )
if ( not config . has_key ( " variables " ) or not config [ " variables " ] . has_key ( treeStr ) ) :
if ( not config . has_key ( " variables " ) or not config [ " variables " ] . has_key ( treeStr ) ) :
print " ERROR: no variable: variables. %s for tree %s " % ( treeStr , tree )
print " ERROR: no variable: variables. %s for tree %s " % ( treeStr , tree )
sys . exit ( 1 )
sys . exit ( 1 )
config [ " trees " ] [ tree ] = config [ " variables " ] [ treeStr ]
config [ " trees " ] [ tree ] = config [ " variables " ] [ treeStr ]
myconfig = config [ " trees " ] [ tree ]
myconfig = config [ " trees " ] [ tree ]
if ( arg s. verbose > 4 ) :
if ( option s. verbose > 4 ) :
print " Config: %s " % ( myconfig )
print " Config: %s " % ( myconfig )
# Process this tree
# Process this tree
if ( len ( myconfig ) == 0 or len ( mytree [ " locs " ] ) == 0 ) :
if ( len ( myconfig ) == 0 or len ( mytree [ " locs " ] ) == 0 ) :
if ( arg s. verbose > 2 ) :
if ( option s. verbose > 2 ) :
print " No processing for %s - skipping " % ( tree )
print " No processing for %s - skipping " % ( tree )
else :
else :
only = None
only = None
@ -217,7 +222,7 @@ def queueForRemoval(tree):
if ( len ( only ) == 0 ) and ( mytree [ " treeprefix " ] != " " ) :
if ( len ( only ) == 0 ) and ( mytree [ " treeprefix " ] != " " ) :
thePool = " %s pool.res " % ( mytree [ " treeprefix " ] )
thePool = " %s pool.res " % ( mytree [ " treeprefix " ] )
if ( thePool in itemset ) :
if ( thePool in itemset ) :
if ( arg s. verbose > 0 ) :
if ( option s. verbose > 0 ) :
print " Removing %s because tree %s is empty. " % ( thePool , tree )
print " Removing %s because tree %s is empty. " % ( thePool , tree )
remove . add ( thePool )
remove . add ( thePool )
else :
else :
@ -227,12 +232,12 @@ def queueForRemoval(tree):
if ( only is not None ) and not loc in only :
if ( only is not None ) and not loc in only :
# REMOVE loc
# REMOVE loc
toRemove = " %s %s %s " % ( mytree [ " treeprefix " ] , loc , mytree [ " extension " ] )
toRemove = " %s %s %s " % ( mytree [ " treeprefix " ] , loc , mytree [ " extension " ] )
if ( arg s. verbose > 6 ) :
if ( option s. verbose > 6 ) :
print " Queueing for removal: %s " % toRemove
print " Queueing for removal: %s " % toRemove
remove . add ( toRemove )
remove . add ( toRemove )
def addTreeByType ( tree , mytree ) :
def addTreeByType ( tree , mytree ) :
if ( arg s. verbose > 1 ) :
if ( option s. verbose > 1 ) :
print " (considering %s ): %s " % ( tree , mytree )
print " (considering %s ): %s " % ( tree , mytree )
trees [ tree ] = mytree
trees [ tree ] = mytree
mytree [ " locs " ] = [ ]
mytree [ " locs " ] = [ ]
@ -259,18 +264,18 @@ for i in range(len(items)):
tree = " ROOT "
tree = " ROOT "
else :
else :
tree = treeprefix [ 0 : - 1 ]
tree = treeprefix [ 0 : - 1 ]
if ( arg s. verbose > 6 ) :
if ( option s. verbose > 6 ) :
print " procesing %s " % ( tree )
print " procesing %s " % ( tree )
trees [ tree ] = { " extension " : " .res " , " treeprefix " : treeprefix , " hasIndex " : True }
trees [ tree ] = { " extension " : " .res " , " treeprefix " : treeprefix , " hasIndex " : True }
# read in the resource list for the tree
# read in the resource list for the tree
treelistfile = os . path . join ( arg s. tmpdir , " %s .lst " % tree )
treelistfile = os . path . join ( option s. tmpdir , " %s .lst " % tree )
runcmd ( " iculslocs " , " -i %s -N %s -T %s -l > %s " % ( outfile , dataname , tree , treelistfile ) )
runcmd ( " iculslocs " , " -i %s -N %s -T %s -l > %s " % ( outfile , dataname , tree , treelistfile ) )
fi = open ( treelistfile , ' rb ' )
fi = open ( treelistfile , ' rb ' )
treeitems = fi . readlines ( )
treeitems = fi . readlines ( )
trees [ tree ] [ " locs " ] = [ treeitems [ i ] . strip ( ) for i in range ( len ( treeitems ) ) ]
trees [ tree ] [ " locs " ] = [ treeitems [ i ] . strip ( ) for i in range ( len ( treeitems ) ) ]
fi . close ( )
fi . close ( )
if ( not config . has_key ( " trees " ) or not config [ " trees " ] . has_key ( tree ) ) :
if ( not config . has_key ( " trees " ) or not config [ " trees " ] . has_key ( tree ) ) :
print " Warning: filter file %s does not mention trees. %s - will be kept as-is " % ( arg s. filterfile , tree )
print " Warning: filter file %s does not mention trees. %s - will be kept as-is " % ( option s. filterfile , tree )
else :
else :
queueForRemoval ( tree )
queueForRemoval ( tree )
@ -281,19 +286,19 @@ def removeList(count=0):
if ( count > 10 ) :
if ( count > 10 ) :
print " Giving up - %d th attempt at removal. " % count
print " Giving up - %d th attempt at removal. " % count
sys . exit ( 1 )
sys . exit ( 1 )
if ( arg s. verbose > 1 ) :
if ( option s. verbose > 1 ) :
print " %d items to remove - try # %d " % ( len ( remove ) , count )
print " %d items to remove - try # %d " % ( len ( remove ) , count )
if ( len ( remove ) > 0 ) :
if ( len ( remove ) > 0 ) :
oldcount = len ( remove )
oldcount = len ( remove )
hackerrfile = os . path . join ( arg s. tmpdir , " REMOVE.err " )
hackerrfile = os . path . join ( option s. tmpdir , " REMOVE.err " )
removefile = os . path . join ( arg s. tmpdir , " REMOVE.lst " )
removefile = os . path . join ( option s. tmpdir , " REMOVE.lst " )
fi = open ( removefile , ' wb ' )
fi = open ( removefile , ' wb ' )
for i in remove :
for i in remove :
print >> fi , i
print >> fi , i
fi . close ( )
fi . close ( )
rc = runcmd ( " icupkg " , " -r %s %s 2> %s " % ( removefile , outfile , hackerrfile ) , True )
rc = runcmd ( " icupkg " , " -r %s %s 2> %s " % ( removefile , outfile , hackerrfile ) , True )
if rc is not 0 :
if rc is not 0 :
if ( arg s. verbose > 5 ) :
if ( option s. verbose > 5 ) :
print " ## Damage control, trying to parse stderr from icupkg.. "
print " ## Damage control, trying to parse stderr from icupkg.. "
fi = open ( hackerrfile , ' rb ' )
fi = open ( hackerrfile , ' rb ' )
erritems = fi . readlines ( )
erritems = fi . readlines ( )
@ -305,13 +310,13 @@ def removeList(count=0):
m = pat . match ( line )
m = pat . match ( line )
if m :
if m :
toDelete = m . group ( 1 )
toDelete = m . group ( 1 )
if ( arg s. verbose > 5 ) :
if ( option s. verbose > 5 ) :
print " << %s added to delete " % toDelete
print " << %s added to delete " % toDelete
remove . add ( toDelete )
remove . add ( toDelete )
else :
else :
print " ERROR: could not match errline: %s " % line
print " ERROR: could not match errline: %s " % line
sys . exit ( 1 )
sys . exit ( 1 )
if ( arg s. verbose > 5 ) :
if ( option s. verbose > 5 ) :
print " now %d items to remove " % len ( remove )
print " now %d items to remove " % len ( remove )
if ( oldcount == len ( remove ) ) :
if ( oldcount == len ( remove ) ) :
print " ERROR: could not add any mor eitems to remove. Fail. "
print " ERROR: could not add any mor eitems to remove. Fail. "
@ -326,7 +331,7 @@ for tree in trees:
# skip trees that don't have res_index
# skip trees that don't have res_index
if not trees [ tree ] . has_key ( " hasIndex " ) :
if not trees [ tree ] . has_key ( " hasIndex " ) :
continue
continue
treebunddir = arg s. tmpdir
treebunddir = option s. tmpdir
if ( trees [ tree ] [ " treeprefix " ] ) :
if ( trees [ tree ] [ " treeprefix " ] ) :
treebunddir = os . path . join ( treebunddir , trees [ tree ] [ " treeprefix " ] )
treebunddir = os . path . join ( treebunddir , trees [ tree ] [ " treeprefix " ] )
if not ( os . path . isdir ( treebunddir ) ) :
if not ( os . path . isdir ( treebunddir ) ) :
@ -335,4 +340,4 @@ for tree in trees:
treebundtxt = " %s .txt " % ( treebundres [ 0 : - 4 ] )
treebundtxt = " %s .txt " % ( treebundres [ 0 : - 4 ] )
runcmd ( " iculslocs " , " -i %s -N %s -T %s -b %s " % ( outfile , dataname , tree , treebundtxt ) )
runcmd ( " iculslocs " , " -i %s -N %s -T %s -b %s " % ( outfile , dataname , tree , treebundtxt ) )
runcmd ( " genrb " , " -d %s -s %s res_index.txt " % ( treebunddir , treebunddir ) )
runcmd ( " genrb " , " -d %s -s %s res_index.txt " % ( treebunddir , treebunddir ) )
runcmd ( " icupkg " , " -s %s -a %s %s %s " % ( arg s. tmpdir , trees [ tree ] [ " treeprefix " ] , RES_INDX , outfile ) )
runcmd ( " icupkg " , " -s %s -a %s %s %s " % ( option s. tmpdir , trees [ tree ] [ " treeprefix " ] , RES_INDX , outfile ) )