• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;;; po-compat.el --- basic support of PO translation files -*- coding: latin-1; -*-
2
3;; Copyright (C) 1995-2002, 2010, 2016, 2019 Free Software Foundation, Inc.
4
5;; Authors: Fran�ois Pinard <pinard@iro.umontreal.ca>,
6;;          Greg McGary <gkm@magilla.cichlid.com>,
7;;          Bruno Haible <bruno@clisp.org>.
8;; Keywords: i18n, files
9
10;; This file is part of GNU gettext.
11
12;; GNU gettext is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU gettext is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs; see the file COPYING.  If not, see
24;; <https://www.gnu.org/licenses/>.
25
26;;; Commentary:
27
28;; Emacs 21.2 and newer already contain this file, under the name po.el,
29;; and without portability hassles.
30
31;; This package makes sure visiting PO files decodes them correctly,
32;; according to the Charset= header in the PO file.  For more support
33;; for editing PO files, see po-mode.el.
34
35;;; Code:
36
37;;; Emacs portability matters.
38
39(defconst po-content-type-charset-alist
40  '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
41    ("ASCII" . undecided)
42    ("ANSI_X3.4-1968" . undecided)
43    ("US-ASCII" . undecided)
44    ("ISO-8859-1" . iso-8859-1)
45    ("ISO_8859-1" . iso-8859-1)
46    ("ISO-8859-2" . iso-8859-2)
47    ("ISO_8859-2" . iso-8859-2)
48    ("ISO-8859-3" . iso-8859-3)
49    ("ISO_8859-3" . iso-8859-3)
50    ("ISO-8859-4" . iso-8859-4)
51    ("ISO_8859-4" . iso-8859-4)
52    ("ISO-8859-5" . iso-8859-5)
53    ("ISO_8859-5" . iso-8859-5)
54    ;("ISO-8859-6" . ??)
55    ;("ISO_8859-6" . ??)
56    ("ISO-8859-7" . iso-8859-7)
57    ("ISO_8859-7" . iso-8859-7)
58    ("ISO-8859-8" . iso-8859-8)
59    ("ISO_8859-8" . iso-8859-8)
60    ("ISO-8859-9" . iso-8859-9)
61    ("ISO_8859-9" . iso-8859-9)
62    ;("ISO-8859-13" . ??)
63    ;("ISO_8859-13" . ??)
64    ;("ISO-8859-14" . ??)
65    ;("ISO_8859-14" . ??)
66    ("ISO-8859-15" . iso-8859-15)
67    ("ISO_8859-15" . iso-8859-15)
68    ("KOI8-R" . koi8-r)
69    ;("KOI8-U" . ??)
70    ;("KOI8-T" . ??)
71    ("CP437" . cp437)
72    ("CP775" . cp775)
73    ("CP850" . cp850)
74    ("CP852" . cp852)
75    ("CP855" . cp855)
76    ;("CP856" . ??)
77    ("CP857" . cp857)
78    ("CP861" . cp861)
79    ("CP862" . cp862)
80    ("CP864" . cp864)
81    ("CP865" . cp865)
82    ("CP866" . cp866)
83    ("CP869" . cp869)
84    ;("CP874" . ??)
85    ;("CP922" . ??)
86    ;("CP932" . ??)
87    ;("CP943" . ??)
88    ;("CP949" . ??)
89    ;("CP950" . ??)
90    ;("CP1046" . ??)
91    ;("CP1124" . ??)
92    ;("CP1129" . ??)
93    ("CP1250" . cp1250)
94    ("CP1251" . cp1251)
95    ("CP1252" . iso-8859-1) ; approximation
96    ("CP1253" . cp1253)
97    ("CP1254" . iso-8859-9) ; approximation
98    ("CP1255" . iso-8859-8) ; approximation
99    ;("CP1256" . ??)
100    ("CP1257" . cp1257)
101    ("GB2312" . cn-gb-2312)  ; also named 'gb2312' and 'euc-cn'
102    ("EUC-JP" . euc-jp)
103    ("EUC-KR" . euc-kr)
104    ;("EUC-TW" . ??)
105    ("BIG5" . big5)
106    ;("BIG5-HKSCS" . ??)
107    ;("GBK" . ??)
108    ;("GB18030" . ??)
109    ("SHIFT_JIS" . shift_jis)
110    ;("JOHAB" . ??)
111    ("TIS-620" . tis-620)
112    ("VISCII" . viscii)
113    ;("GEORGIAN-PS" . ??)
114    ("UTF-8" . utf-8)
115    )
116  "How to convert a GNU libc/libiconv canonical charset name as seen in
117Content-Type into a Mule coding system.")
118
119(defun po-find-charset (filename)
120  "Return PO file charset value."
121  (interactive)
122  (let ((charset-regexp
123         "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
124        (short-read nil))
125    ;; Try the first 4096 bytes.  In case we cannot find the charset value
126    ;; within the first 4096 bytes (the PO file might start with a long
127    ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
128    ;; we've checked the empty header entry entirely.
129    (while (not (or short-read (re-search-forward "^msgid" nil t)))
130      (save-excursion
131        (goto-char (point-max))
132        (let ((pair (insert-file-contents-literally filename nil
133                                                    (1- (point))
134                                                    (1- (+ (point) 4096)))))
135          (setq short-read (< (nth 1 pair) 4096)))))
136    (cond ((re-search-forward charset-regexp nil t) (match-string 1))
137          (short-read nil)
138          ;; We've found the first msgid; maybe, only a part of the msgstr
139          ;; value was loaded.  Load the next 1024 bytes; if charset still
140          ;; isn't available, give up.
141          (t (save-excursion
142               (goto-char (point-max))
143               (insert-file-contents-literally filename nil
144                                               (1- (point))
145                                               (1- (+ (point) 1024))))
146             (if (re-search-forward charset-regexp nil t)
147                 (match-string 1))))))
148
149;;;###autoload (autoload 'po-find-file-coding-system "po-compat")
150
151(defun po-find-file-coding-system-guts (operation filename)
152  "\
153Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
154Called through file-coding-system-alist, before the file is visited for real."
155  (and (eq operation 'insert-file-contents)
156       (file-exists-p filename)
157       (po-with-temp-buffer
158        (let* ((coding-system-for-read 'no-conversion)
159               (charset (or (po-find-charset filename) "ascii"))
160               (charset-upper (upcase charset))
161               (charset-lower (downcase charset))
162               (candidate
163                (cdr (assoc charset-upper po-content-type-charset-alist)))
164               (try-symbol (or candidate (intern-soft charset-lower)))
165               (try-string
166                (if try-symbol (symbol-name try-symbol) charset-lower)))
167          (list (cond ((and try-symbol (coding-system-p try-symbol))
168                       try-symbol)
169                      ((and (not (string-lessp "23" emacs-version))
170                            (string-match "\\`cp[1-9][0-9][0-9]?\\'"
171                                          try-string)
172                            (assoc (substring try-string 2)
173                                   (cp-supported-codepages)))
174                       (codepage-setup (substring try-string 2))
175                       (intern try-string))
176                      (t
177                       'no-conversion)))))))
178
179(defun po-find-file-coding-system (arg-list)
180  "\
181Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
182Called through file-coding-system-alist, before the file is visited for real."
183  (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
184
185(provide 'po-compat)
186
187;;; Testing this file:
188
189;; For each pofile in {
190;;   cs.po           ; gettext/po/cs.el, charset=ISO-8859-2
191;;   cs-modified.po  ; gettext/po/cs.el, charset=ISO_8859-2
192;;   de.po           ; gettext/po/de.el, charset=UTF-8, if $emacsimpl = emacs
193;; } do
194;;   Start $emacsimpl
195;;   M-x load-file  po-compat.el RET
196;;   C-x C-f  $pofile RET
197;;   Verify charset marker in status line ('2' = ISO-8859-2, 'u' = UTF-8).
198
199;;; po-compat.el ends here
200