1;;; po-compat.el --- basic support of PO translation files -*- coding: latin-1; -*- 2 3;; Copyright (C) 1995-2002, 2010, 2016, 2019 Free Software Foundation, Inc. 4 5;; Authors: Fran�ois Pinard <pinard@iro.umontreal.ca>, 6;; Greg McGary <gkm@magilla.cichlid.com>, 7;; Bruno Haible <bruno@clisp.org>. 8;; Keywords: i18n, files 9 10;; This file is part of GNU gettext. 11 12;; GNU gettext is free software; you can redistribute it and/or modify 13;; it under the terms of the GNU General Public License as published by 14;; the Free Software Foundation; either version 2, or (at your option) 15;; any later version. 16 17;; GNU gettext is distributed in the hope that it will be useful, 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20;; GNU General Public License for more details. 21 22;; You should have received a copy of the GNU General Public License 23;; along with GNU Emacs; see the file COPYING. If not, see 24;; <https://www.gnu.org/licenses/>. 25 26;;; Commentary: 27 28;; Emacs 21.2 and newer already contain this file, under the name po.el, 29;; and without portability hassles. 30 31;; This package makes sure visiting PO files decodes them correctly, 32;; according to the Charset= header in the PO file. For more support 33;; for editing PO files, see po-mode.el. 34 35;;; Code: 36 37;;; Emacs portability matters. 38 39(defconst po-content-type-charset-alist 40 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries. 41 ("ASCII" . undecided) 42 ("ANSI_X3.4-1968" . undecided) 43 ("US-ASCII" . undecided) 44 ("ISO-8859-1" . iso-8859-1) 45 ("ISO_8859-1" . iso-8859-1) 46 ("ISO-8859-2" . iso-8859-2) 47 ("ISO_8859-2" . iso-8859-2) 48 ("ISO-8859-3" . iso-8859-3) 49 ("ISO_8859-3" . iso-8859-3) 50 ("ISO-8859-4" . iso-8859-4) 51 ("ISO_8859-4" . iso-8859-4) 52 ("ISO-8859-5" . iso-8859-5) 53 ("ISO_8859-5" . iso-8859-5) 54 ;("ISO-8859-6" . ??) 55 ;("ISO_8859-6" . ??) 56 ("ISO-8859-7" . iso-8859-7) 57 ("ISO_8859-7" . iso-8859-7) 58 ("ISO-8859-8" . iso-8859-8) 59 ("ISO_8859-8" . iso-8859-8) 60 ("ISO-8859-9" . iso-8859-9) 61 ("ISO_8859-9" . iso-8859-9) 62 ;("ISO-8859-13" . ??) 63 ;("ISO_8859-13" . ??) 64 ;("ISO-8859-14" . ??) 65 ;("ISO_8859-14" . ??) 66 ("ISO-8859-15" . iso-8859-15) 67 ("ISO_8859-15" . iso-8859-15) 68 ("KOI8-R" . koi8-r) 69 ;("KOI8-U" . ??) 70 ;("KOI8-T" . ??) 71 ("CP437" . cp437) 72 ("CP775" . cp775) 73 ("CP850" . cp850) 74 ("CP852" . cp852) 75 ("CP855" . cp855) 76 ;("CP856" . ??) 77 ("CP857" . cp857) 78 ("CP861" . cp861) 79 ("CP862" . cp862) 80 ("CP864" . cp864) 81 ("CP865" . cp865) 82 ("CP866" . cp866) 83 ("CP869" . cp869) 84 ;("CP874" . ??) 85 ;("CP922" . ??) 86 ;("CP932" . ??) 87 ;("CP943" . ??) 88 ;("CP949" . ??) 89 ;("CP950" . ??) 90 ;("CP1046" . ??) 91 ;("CP1124" . ??) 92 ;("CP1129" . ??) 93 ("CP1250" . cp1250) 94 ("CP1251" . cp1251) 95 ("CP1252" . iso-8859-1) ; approximation 96 ("CP1253" . cp1253) 97 ("CP1254" . iso-8859-9) ; approximation 98 ("CP1255" . iso-8859-8) ; approximation 99 ;("CP1256" . ??) 100 ("CP1257" . cp1257) 101 ("GB2312" . cn-gb-2312) ; also named 'gb2312' and 'euc-cn' 102 ("EUC-JP" . euc-jp) 103 ("EUC-KR" . euc-kr) 104 ;("EUC-TW" . ??) 105 ("BIG5" . big5) 106 ;("BIG5-HKSCS" . ??) 107 ;("GBK" . ??) 108 ;("GB18030" . ??) 109 ("SHIFT_JIS" . shift_jis) 110 ;("JOHAB" . ??) 111 ("TIS-620" . tis-620) 112 ("VISCII" . viscii) 113 ;("GEORGIAN-PS" . ??) 114 ("UTF-8" . utf-8) 115 ) 116 "How to convert a GNU libc/libiconv canonical charset name as seen in 117Content-Type into a Mule coding system.") 118 119(defun po-find-charset (filename) 120 "Return PO file charset value." 121 (interactive) 122 (let ((charset-regexp 123 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") 124 (short-read nil)) 125 ;; Try the first 4096 bytes. In case we cannot find the charset value 126 ;; within the first 4096 bytes (the PO file might start with a long 127 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure 128 ;; we've checked the empty header entry entirely. 129 (while (not (or short-read (re-search-forward "^msgid" nil t))) 130 (save-excursion 131 (goto-char (point-max)) 132 (let ((pair (insert-file-contents-literally filename nil 133 (1- (point)) 134 (1- (+ (point) 4096))))) 135 (setq short-read (< (nth 1 pair) 4096))))) 136 (cond ((re-search-forward charset-regexp nil t) (match-string 1)) 137 (short-read nil) 138 ;; We've found the first msgid; maybe, only a part of the msgstr 139 ;; value was loaded. Load the next 1024 bytes; if charset still 140 ;; isn't available, give up. 141 (t (save-excursion 142 (goto-char (point-max)) 143 (insert-file-contents-literally filename nil 144 (1- (point)) 145 (1- (+ (point) 1024)))) 146 (if (re-search-forward charset-regexp nil t) 147 (match-string 1)))))) 148 149;;;###autoload (autoload 'po-find-file-coding-system "po-compat") 150 151(defun po-find-file-coding-system-guts (operation filename) 152 "\ 153Return a Mule (DECODING . ENCODING) pair, according to PO file charset. 154Called through file-coding-system-alist, before the file is visited for real." 155 (and (eq operation 'insert-file-contents) 156 (file-exists-p filename) 157 (po-with-temp-buffer 158 (let* ((coding-system-for-read 'no-conversion) 159 (charset (or (po-find-charset filename) "ascii")) 160 (charset-upper (upcase charset)) 161 (charset-lower (downcase charset)) 162 (candidate 163 (cdr (assoc charset-upper po-content-type-charset-alist))) 164 (try-symbol (or candidate (intern-soft charset-lower))) 165 (try-string 166 (if try-symbol (symbol-name try-symbol) charset-lower))) 167 (list (cond ((and try-symbol (coding-system-p try-symbol)) 168 try-symbol) 169 ((and (not (string-lessp "23" emacs-version)) 170 (string-match "\\`cp[1-9][0-9][0-9]?\\'" 171 try-string) 172 (assoc (substring try-string 2) 173 (cp-supported-codepages))) 174 (codepage-setup (substring try-string 2)) 175 (intern try-string)) 176 (t 177 'no-conversion))))))) 178 179(defun po-find-file-coding-system (arg-list) 180 "\ 181Return a Mule (DECODING . ENCODING) pair, according to PO file charset. 182Called through file-coding-system-alist, before the file is visited for real." 183 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))) 184 185(provide 'po-compat) 186 187;;; Testing this file: 188 189;; For each pofile in { 190;; cs.po ; gettext/po/cs.el, charset=ISO-8859-2 191;; cs-modified.po ; gettext/po/cs.el, charset=ISO_8859-2 192;; de.po ; gettext/po/de.el, charset=UTF-8, if $emacsimpl = emacs 193;; } do 194;; Start $emacsimpl 195;; M-x load-file po-compat.el RET 196;; C-x C-f $pofile RET 197;; Verify charset marker in status line ('2' = ISO-8859-2, 'u' = UTF-8). 198 199;;; po-compat.el ends here 200