• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **************************************************************************
5  * Copyright (C) 2005-2010, International Business Machines Corporation   *
6  * and others. All Rights Reserved.                                       *
7  **************************************************************************
8  *
9  */
10 
11 package com.ibm.icu.dev.demo.charsetdet;
12 
13 import java.awt.Font;
14 import java.awt.event.ActionEvent;
15 import java.awt.event.ActionListener;
16 import java.awt.event.KeyEvent;
17 import java.awt.event.WindowAdapter;
18 import java.awt.event.WindowEvent;
19 import java.io.BufferedInputStream;
20 import java.io.File;
21 import java.io.FileInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.InputStreamReader;
25 import java.net.URL;
26 import java.nio.ByteBuffer;
27 import java.nio.charset.Charset;
28 import java.security.AccessControlException;
29 
30 import javax.swing.JFileChooser;
31 import javax.swing.JFrame;
32 import javax.swing.JMenu;
33 import javax.swing.JMenuBar;
34 import javax.swing.JMenuItem;
35 import javax.swing.JOptionPane;
36 import javax.swing.JScrollPane;
37 import javax.swing.JTextPane;
38 import javax.swing.KeyStroke;
39 
40 import com.ibm.icu.charset.CharsetICU;
41 import com.ibm.icu.dev.demo.impl.DemoApplet;
42 import com.ibm.icu.text.CharsetDetector;
43 import com.ibm.icu.text.CharsetMatch;
44 
45 /**
46  * This simple application demonstrates how to use the CharsetDetector API. It
47  * opens a file or web page, detects the encoding, and then displays it using that
48  * encoding.
49  */
50 public class DetectingViewer extends JFrame implements ActionListener
51 {
52 
53     /**
54      * For serialization
55      */
56     private static final long serialVersionUID = -2307065724464747775L;
57     private JTextPane text;
58     private JFileChooser fileChooser;
59 
60     /**
61      * @throws java.awt.HeadlessException
62      */
DetectingViewer()63     public DetectingViewer()
64     {
65         super();
66         DemoApplet.demoFrameOpened();
67 
68         try {
69             fileChooser = new JFileChooser();
70         } catch (AccessControlException ace) {
71             System.err.println("no file chooser - access control exception. Continuing without file browsing. "+ace.toString());
72             fileChooser = null; //
73         }
74 
75 //        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
76         setSize(800, 800);
77 
78         setJMenuBar(makeMenus());
79         text = new JTextPane();
80         text.setContentType("text/plain");
81         text.setText("");
82         text.setSize(800, 800);
83 
84         Font font = new Font("Arial Unicode MS", Font.PLAIN, 24);
85         text.setFont(font);
86 
87         JScrollPane scrollPane = new JScrollPane(text);
88 
89         getContentPane().add(scrollPane);
90         setVisible(true);
91 
92         addWindowListener(
93                 new WindowAdapter() {
94                     public void windowClosing(WindowEvent e) {
95 //                        setVisible(false);
96 //                        dispose();
97 
98                           doQuit();
99                     }
100                 } );
101 
102 
103     }
104 
actionPerformed(ActionEvent event)105     public void actionPerformed(ActionEvent event)
106     {
107         String cmd = event.getActionCommand();
108 
109         if (cmd.equals("New...")) {
110            doNew();
111         } else if (cmd.equals("Open File...")) {
112            doOpenFile();
113         } else if (cmd.equals("Open URL...")) {
114             doOpenURL();
115         } else if (cmd.equals("Quit")) {
116            doQuit();
117         }
118     }
119 
main(String[] args)120     public static void main(String[] args)
121     {
122         new DetectingViewer();
123     }
124 
errorDialog(String title, String msg)125     private void errorDialog(String title, String msg)
126     {
127         JOptionPane.showMessageDialog(this, msg, title, JOptionPane.ERROR_MESSAGE);
128     }
129 
openFile(File file)130     private BufferedInputStream openFile(File file)
131     {
132         FileInputStream fileStream = null;
133 
134         try {
135             fileStream = new FileInputStream(file);
136         } catch (Exception e) {
137             errorDialog("Error Opening File", e.getMessage());
138             return null;
139         }
140 
141         return new BufferedInputStream(fileStream);
142     }
143 
144 //    private void openFile(String directory, String filename)
145 //    {
146 //        openFile(new File(directory, filename));
147 //    }
148 
149 
openURL(String url)150     private BufferedInputStream openURL(String url)
151     {
152         InputStream s = null;
153 
154         try {
155             URL aURL = new URL(url);
156             s = aURL.openStream();
157         } catch (Exception e) {
158             errorDialog("Error Opening URL", e.getMessage());
159             return null;
160         }
161 
162         return new BufferedInputStream(s);
163     }
164 
encodingName(CharsetMatch match)165     private String encodingName(CharsetMatch match)
166     {
167         return match.getName() + " (" + match.getLanguage() + ")";
168     }
169 
setMatchMenu(CharsetMatch[] matches)170     private void setMatchMenu(CharsetMatch[] matches)
171     {
172         JMenu menu = getJMenuBar().getMenu(1);
173         JMenuItem menuItem;
174 
175         menu.removeAll();
176 
177         for (int i = 0; i < matches.length; i += 1) {
178             CharsetMatch match = matches[i];
179 
180             menuItem = new JMenuItem(encodingName(match) + " " + match.getConfidence());
181 
182             menu.add(menuItem);
183         }
184     }
185 
186     private byte[] scriptTag = {(byte) 's', (byte) 'c', (byte) 'r', (byte) 'i', (byte) 'p', (byte) 't'};
187     private byte[] styleTag  = {(byte) 's', (byte) 't', (byte) 'y', (byte) 'l', (byte) 'e'};
188     private static int BUFFER_SIZE = 100000;
189 
openTag(byte[] buffer, int offset, int length, byte[] tag)190     private boolean openTag(byte[] buffer, int offset, int length, byte[] tag)
191     {
192         int tagLen = tag.length;
193         int bufRem = length - offset;
194         int b;
195 
196         for (b = 0; b < tagLen && b < bufRem; b += 1) {
197             if (buffer[b + offset] != tag[b]) {
198                 return false;
199             }
200         }
201 
202         return b == tagLen;
203     }
204 
closedTag(byte[] buffer, int offset, int length, byte[] tag)205     private boolean closedTag(byte[] buffer, int offset, int length, byte[] tag)
206     {
207         if (buffer[offset] != (byte) '/') {
208             return false;
209         }
210 
211         return openTag(buffer, offset + 1, length, tag);
212     }
213 
filter(InputStream in)214     private byte[] filter(InputStream in)
215     {
216         byte[] buffer = new byte[BUFFER_SIZE];
217         int bytesRemaining = BUFFER_SIZE;
218         int bufLen = 0;
219 
220         in.mark(BUFFER_SIZE);
221 
222         try {
223             while (bytesRemaining > 0) {
224                 int bytesRead = in.read(buffer, bufLen, bytesRemaining);
225 
226                 if (bytesRead <= 0) {
227                     break;
228                 }
229 
230                 bufLen += bytesRead;
231                 bytesRemaining -= bytesRead;
232             }
233         } catch (Exception e) {
234             // TODO: error handling?
235             return null;
236         }
237 
238         boolean inTag = false;
239         boolean skip  = false;
240         int out = 0;
241 
242         for (int i = 0; i < bufLen; i += 1) {
243             byte b = buffer[i];
244 
245             if (b == (byte) '<') {
246                 inTag = true;
247 
248                 if (openTag(buffer, i + 1, bufLen, scriptTag) ||
249                     openTag(buffer, i + 1, bufLen, styleTag)) {
250                     skip = true;
251                 } else if (closedTag(buffer, i + 1, bufLen, scriptTag) ||
252                            closedTag(buffer, i + 1, bufLen, styleTag)) {
253                     skip = false;
254                 }
255             } else if (b == (byte) '>') {
256                 inTag = false;
257             } else if (! (inTag || skip)) {
258                 buffer[out++] = b;
259             }
260         }
261 
262         byte[] filtered = new byte[out];
263 
264         System.arraycopy(buffer, 0, filtered, 0, out);
265         return filtered;
266     }
267 
detect(byte[] bytes)268     private CharsetMatch[] detect(byte[] bytes)
269     {
270         CharsetDetector det = new CharsetDetector();
271 
272         det.setText(bytes);
273 
274         return det.detectAll();
275     }
276 
detect(BufferedInputStream inputStream)277     private CharsetMatch[] detect(BufferedInputStream inputStream)
278     {
279         CharsetDetector det    = new CharsetDetector();
280 
281         try {
282             det.setText(inputStream);
283 
284             return det.detectAll();
285         } catch (Exception e) {
286             // TODO: error message?
287             return null;
288         }
289     }
290 
show(InputStream inputStream, CharsetMatch[] matches, String title)291     private void show(InputStream inputStream, CharsetMatch[] matches, String title)
292     {
293         InputStreamReader isr;
294         char[] buffer = new char[1024];
295         int bytesRead = 0;
296 
297         if (matches == null || matches.length == 0) {
298             errorDialog("Match Error", "No matches!");
299             return;
300         }
301 
302         try {
303             StringBuffer sb = new StringBuffer();
304             String encoding = matches[0].getName();
305 
306             inputStream.reset();
307 
308             if (encoding.startsWith("UTF-32")) {
309                 byte[] bytes = new byte[1024];
310                 int offset = 0;
311                 int chBytes = 0;
312                 Charset utf32 = CharsetICU.forNameICU(encoding);
313 
314                 while ((bytesRead = inputStream.read(bytes, offset, 1024)) >= 0) {
315                     offset  = bytesRead % 4;
316                     chBytes = bytesRead - offset;
317 
318                     sb.append(utf32.decode(ByteBuffer.wrap(bytes)).toString());
319 
320                     if (offset != 0) {
321                         for (int i = 0; i < offset; i += 1) {
322                             bytes[i] = bytes[chBytes + i];
323                         }
324                     }
325                 }
326             } else {
327                 isr = new InputStreamReader(inputStream, encoding);
328 
329                 while ((bytesRead = isr.read(buffer, 0, 1024)) >= 0) {
330                     sb.append(buffer, 0, bytesRead);
331                 }
332 
333                 isr.close();
334             }
335 
336             this.setTitle(title + " - " + encodingName(matches[0]));
337 
338             setMatchMenu(matches);
339             text.setText(sb.toString());
340         } catch (IOException e) {
341             errorDialog("IO Error", e.getMessage());
342         } catch (Exception e) {
343             errorDialog("Internal Error", e.getMessage());
344         }
345     }
346 
doNew()347     private void doNew()
348     {
349         // open a new window...
350     }
351 
doOpenFile()352     private void doOpenFile()
353     {
354         int retVal = fileChooser.showOpenDialog(this);
355 
356         if (retVal == JFileChooser.APPROVE_OPTION) {
357             File file = fileChooser.getSelectedFile();
358             BufferedInputStream inputStream = openFile(file);
359 
360             if (inputStream != null) {
361                 CharsetMatch[] matches = detect(inputStream);
362 
363                 show(inputStream, matches, file.getName());
364             }
365         }
366     }
367 
doOpenURL()368     private void doOpenURL()
369     {
370         String url = (String) JOptionPane.showInputDialog(this, "URL to open:", "Open URL", JOptionPane.PLAIN_MESSAGE,
371                 null, null, null);
372 
373         if (url != null && url.length() > 0) {
374             BufferedInputStream inputStream = openURL(url);
375 
376             if (inputStream != null) {
377                 byte[] filtered = filter(inputStream);
378                 CharsetMatch[] matches = detect(filtered);
379 
380                 show(inputStream, matches, url);
381             }
382         }
383 }
384 
doQuit()385     private void doQuit()
386     {
387         DemoApplet.demoFrameClosed();
388         this.setVisible(false);
389         this.dispose();
390     }
391 
makeMenus()392     private JMenuBar makeMenus()
393     {
394         JMenu menu = new JMenu("File");
395         JMenuItem mi;
396 
397         mi = new JMenuItem("Open File...");
398         mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.CTRL_MASK)));
399         mi.addActionListener(this);
400         menu.add(mi);
401         if(fileChooser == null) {
402             mi.setEnabled(false); // no file chooser.
403         }
404 
405         mi = new JMenuItem("Open URL...");
406         mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_U, ActionEvent.CTRL_MASK)));
407         mi.addActionListener(this);
408         menu.add(mi);
409 
410         mi = new JMenuItem("Quit");
411         mi.setAccelerator((KeyStroke.getKeyStroke(KeyEvent.VK_Q, ActionEvent.CTRL_MASK)));
412         mi.addActionListener(this);
413         menu.add(mi);
414 
415         JMenuBar mbar = new JMenuBar();
416         mbar.add(menu);
417 
418         menu = new JMenu("Detected Encodings");
419         mbar.add(menu);
420 
421         return mbar;
422     }
423 }
424