• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/sh
2# Copyright (C) 2001-2010, International Business Machines
3#   Corporation and others.  All Rights Reserved.
4#
5# Authors:
6# Ami Fixler
7# Steven R. Loomis
8# George Rhoten
9#
10# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
11# After extracting to EBCDIC, binary files are re-extracted without the
12# EBCDIC conversion, thus restoring them to original codepage.
13#
14# Set the following variable to the list of binary file suffixes (extensions)
15
16#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
17#ICU specific binary files
18binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM'
19
20usage()
21{
22    echo "Enter archive filename as a parameter: $0 icu-archive.tar"
23}
24# first make sure we at least one arg and it's a file we can read
25if [ $# -eq 0 ]; then
26    usage
27    exit
28fi
29tar_file=$1
30if [ ! -r $tar_file ]; then
31    echo "$tar_file does not exist or cannot be read."
32    usage
33    exit
34fi
35
36echo ""
37echo "Extracting from $tar_file ..."
38echo ""
39# extract files while converting them to EBCDIC
40pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
41
42echo ""
43echo "Determining binary files ..."
44echo ""
45
46# When building in ASCII mode, text files are converted as ASCII
47if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
48    binary_suffixes="$binary_suffixes txt TXT ucm UCM"
49else
50	for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
51		bom8=`head -c 3 $file|\
52			od -t x1|\
53			head -n 1|\
54			sed 's/  */ /g'|\
55			cut -f2-4 -d ' '|\
56			tr 'A-Z' 'a-z'`;
57		#Find a converted UTF-8 BOM
58		if [ "$bom8" = "57 8b ab" ]
59		then
60			binary_files="$binary_files $file";
61		fi
62	done
63fi
64
65for i in $(pax -f $tar_file 2>/dev/null)
66do
67	case $i in
68	*/) ;;		# then this entry is a directory
69	*.*)		# then this entry has a dot in the filename
70		for j in $binary_suffixes
71		do
72			# We substitute the suffix more than once
73			# to handle files like NormalizationTest-3.2.0.txt
74			suf=${i#*.*}
75			suf=${suf#*.*}
76			suf=${suf#*.*}
77			if [ "$suf" = "$j" ]
78			then
79				binary_files="$binary_files $i"
80				break
81			fi
82		done
83		;;
84	*) ;;		# then this entry does not have a dot in it
85    esac
86done
87
88# now see if a re-extract of binary files is necessary
89if [ ${#binary_files} -eq 0 ]; then
90    echo ""
91    echo "There are no binary files to restore."
92else
93    echo "Restoring binary files ..."
94    echo ""
95    rm $binary_files
96    pax -rvf $tar_file $binary_files
97    # Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
98    # environment setting
99    chtag -b $binary_files
100fi
101echo ""
102echo "$0 has completed extracting ICU from $tar_file."
103