#!/usr/bin/qsh # Copyright (C) 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html # Copyright (C) 2000-2011, International Business Machines # Corporation and others. All Rights Reserved. # # Authors: # Ami Fixler # Barry Novinger # Steven R. Loomis # George Rhoten # Jason Spieth # # Shell script to unpax ICU and convert the files to an EBCDIC codepage. # After extracting to EBCDIC, binary files are re-extracted without the # EBCDIC conversion, thus restoring them to original codepage. if [ -z "$QSH_VERSION" ]; then QSH=0 echo "QSH not detected (QSH_VERSION not set) - just testing." else QSH=1 #echo "QSH version $QSH_VERSION" fi export QSH # set this to "v" to list files as they are unpacked (default) VERBOSE_UNPACK="v" # Set the following variable to the list of binary file suffixes (extensions) #**************************************************************************** #binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK' #ICU specific binary files #**************************************************************************** binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE' data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*' #**************************************************************************** # Function: usage # Description: Prints out text that describes how to call this script # Input: None # Output: None #**************************************************************************** usage() { echo "Enter archive filename as a parameter: $0 icu-archive.tar" } #**************************************************************************** # first make sure we at least one arg and it's a file we can read #**************************************************************************** # check for no arguments if [ $# -eq 0 ]; then usage exit fi # tar file is argument 1 tar_file=$1 # check that the file is valid if [ ! -r $tar_file ]; then echo "$tar_file does not exist or cannot be read." usage exit fi # treat all data files as ebcdic ebcdic_data=$data_files #**************************************************************************** # Extract files. We do this in two passes. One pass for 819 files and a # second pass for 37 files #**************************************************************************** echo "" echo "Extracting from $tar_file ..." echo "" # extract everything as iso-8859-1 except these directories pax -C 819 -rc${VERBOSE_UNPACK}f $tar_file $ebcdic_data # extract files while converting them to EBCDIC echo "" echo "Extracting files which must be in ibm-37 ..." echo "" pax -C 37 -r${VERBOSE_UNPACK}f $tar_file $ebcdic_data #**************************************************************************** # For files we have restored as CCSID 37, check the BOM to see if they # should be processed as 819. Also handle files with special paths. Files # that match will be added to binary files lists. The lists will in turn # be processed to restore files as 819. #**************************************************************************** echo "" echo "Determining binary files by BOM ..." echo "" bin_count=0 # Process BOMs if [ -f icu/as_is/bomlist.txt ]; then echo "Using icu/as_is/bomlist.txt" pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt` else for file in `find ./icu \( -name \*.txt -print \)`; do bom8=`head -n 1 $file|\ od -t x1|\ head -n 1|\ sed 's/ */ /g'|\ cut -f2-4 -d ' '|\ tr 'A-Z' 'a-z'`; #Find a converted UTF-8 BOM if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ] then file="`echo $file | cut -d / -f2-`" if [ `echo $binary_files | wc -w` -lt 200 ] then bin_count=`expr $bin_count + 1` binary_files="$binary_files $file"; else echo "Restoring binary files by BOM ($bin_count)..." rm $binary_files; pax -C 819 -rvf $tar_file $binary_files; echo "Determining binary files by BOM ($bin_count)..." binary_files="$file"; bin_count=`expr $bin_count + 1` fi fi done # now see if a re-extract of binary files is necessary if [ `echo $binary_files | wc -w` -gt 0 ] then echo "Restoring binary files ($bin_count) ..." rm $binary_files pax -C 819 -rvf $tar_file $binary_files fi fi echo "# Processing special paths." # Process special paths more_bin_opts=$(echo $binary_suffixes | sed -e 's%[a-zA-Z0-9]*%-o -name \*.&%g') # echo "Looking for additional files: find ... $more_bin_opts" more_bin_files=$(find icu -type f \( -name '*.zzz' $more_bin_opts \) -print) echo "Restoring binary files by special paths ($bin_count) ..." rm $more_bin_files pax -C 819 -rvf $tar_file $more_bin_files #**************************************************************************** # Generate and run the configure script #**************************************************************************** echo "" echo "Generating qsh compatible configure ..." echo "" sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp del -f icu/source/configure mv icu/source/configureTemp icu/source/configure chmod 755 icu/source/configure echo "" echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted."