1#!/usr/bin/qsh 2# Copyright (C) 2016 and later: Unicode, Inc. and others. 3# License & terms of use: http://www.unicode.org/copyright.html 4# Copyright (C) 2000-2011, International Business Machines 5# Corporation and others. All Rights Reserved. 6# 7# Authors: 8# Ami Fixler 9# Barry Novinger 10# Steven R. Loomis 11# George Rhoten 12# Jason Spieth 13# 14# Shell script to unpax ICU and convert the files to an EBCDIC codepage. 15# After extracting to EBCDIC, binary files are re-extracted without the 16# EBCDIC conversion, thus restoring them to original codepage. 17 18if [ -z "$QSH_VERSION" ]; 19then 20 QSH=0 21 echo "QSH not detected (QSH_VERSION not set) - just testing." 22else 23 QSH=1 24 #echo "QSH version $QSH_VERSION" 25fi 26export QSH 27 28# set this to "v" to list files as they are unpacked (default) 29VERBOSE_UNPACK="v" 30 31# Set the following variable to the list of binary file suffixes (extensions) 32 33 34#**************************************************************************** 35#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK' 36#ICU specific binary files 37#**************************************************************************** 38binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE' 39data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*' 40 41#**************************************************************************** 42# Function: usage 43# Description: Prints out text that describes how to call this script 44# Input: None 45# Output: None 46#**************************************************************************** 47usage() 48{ 49 echo "Enter archive filename as a parameter: $0 icu-archive.tar" 50} 51 52#**************************************************************************** 53# first make sure we at least one arg and it's a file we can read 54#**************************************************************************** 55 56# check for no arguments 57if [ $# -eq 0 ]; then 58 usage 59 exit 60fi 61 62# tar file is argument 1 63tar_file=$1 64 65# check that the file is valid 66if [ ! -r $tar_file ]; then 67 echo "$tar_file does not exist or cannot be read." 68 usage 69 exit 70fi 71 72# treat all data files as ebcdic 73ebcdic_data=$data_files 74 75#**************************************************************************** 76# Extract files. We do this in two passes. One pass for 819 files and a 77# second pass for 37 files 78#**************************************************************************** 79echo "" 80echo "Extracting from $tar_file ..." 81echo "" 82 83# extract everything as iso-8859-1 except these directories 84pax -C 819 -rc${VERBOSE_UNPACK}f $tar_file $ebcdic_data 85 86# extract files while converting them to EBCDIC 87echo "" 88echo "Extracting files which must be in ibm-37 ..." 89echo "" 90pax -C 37 -r${VERBOSE_UNPACK}f $tar_file $ebcdic_data 91 92#**************************************************************************** 93# For files we have restored as CCSID 37, check the BOM to see if they 94# should be processed as 819. Also handle files with special paths. Files 95# that match will be added to binary files lists. The lists will in turn 96# be processed to restore files as 819. 97#**************************************************************************** 98echo "" 99echo "Determining binary files by BOM ..." 100echo "" 101bin_count=0 102# Process BOMs 103if [ -f icu/as_is/bomlist.txt ]; 104then 105 echo "Using icu/as_is/bomlist.txt" 106 pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt` 107else 108 for file in `find ./icu \( -name \*.txt -print \)`; do 109 bom8=`head -n 1 $file|\ 110 od -t x1|\ 111 head -n 1|\ 112 sed 's/ */ /g'|\ 113 cut -f2-4 -d ' '|\ 114 tr 'A-Z' 'a-z'`; 115 #Find a converted UTF-8 BOM 116 if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ] 117 then 118 file="`echo $file | cut -d / -f2-`" 119 120 if [ `echo $binary_files | wc -w` -lt 200 ] 121 then 122 bin_count=`expr $bin_count + 1` 123 binary_files="$binary_files $file"; 124 else 125 echo "Restoring binary files by BOM ($bin_count)..." 126 rm $binary_files; 127 pax -C 819 -rvf $tar_file $binary_files; 128 echo "Determining binary files by BOM ($bin_count)..." 129 binary_files="$file"; 130 bin_count=`expr $bin_count + 1` 131 fi 132 fi 133 done 134 # now see if a re-extract of binary files is necessary 135 if [ `echo $binary_files | wc -w` -gt 0 ] 136 then 137 echo "Restoring binary files ($bin_count) ..." 138 rm $binary_files 139 pax -C 819 -rvf $tar_file $binary_files 140 fi 141fi 142 143echo "# Processing special paths." 144# Process special paths 145more_bin_opts=$(echo $binary_suffixes | sed -e 's%[a-zA-Z0-9]*%-o -name \*.&%g') 146# echo "Looking for additional files: find ... $more_bin_opts" 147more_bin_files=$(find icu -type f \( -name '*.zzz' $more_bin_opts \) -print) 148echo "Restoring binary files by special paths ($bin_count) ..." 149rm $more_bin_files 150pax -C 819 -rvf $tar_file $more_bin_files 151 152#**************************************************************************** 153# Generate and run the configure script 154#**************************************************************************** 155 156echo "" 157echo "Generating qsh compatible configure ..." 158echo "" 159 160sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp 161del -f icu/source/configure 162mv icu/source/configureTemp icu/source/configure 163chmod 755 icu/source/configure 164 165echo "" 166echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted." 167 168