;;; eword-encode.el --- RFC 2047 based encoded-word encoder for GNU Emacs

;; Copyright (C) 1995,1996,1997,1998,1999,2000 Free Software Foundation, Inc.

;; Author: MORIOKA Tomohiko <tomo@m17n.org>
;; Keywords: encoded-word, MIME, multilingual, header, mail, news

;; This file is part of FLIM (Faithful Library about Internet Message).

;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or (at
;; your option) any later version.

;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Code:

(require 'mime-def)
(require 'mel)
(require 'std11)
(require 'eword-decode)


;;; @ variables
;;;

;; User options are defined in mime-def.el.

(defvar mime-header-charset-encoding-alist
  '((us-ascii		. nil)
    (iso-8859-1		. "Q")
    (iso-8859-2		. "Q")
    (iso-8859-3		. "Q")
    (iso-8859-4		. "Q")
    (iso-8859-5		. "Q")
    (koi8-r		. "Q")
    (iso-8859-7		. "Q")
    (iso-8859-8		. "Q")
    (iso-8859-9		. "Q")
    (iso-8859-14	. "Q")
    (iso-8859-15	. "Q")
    (iso-2022-jp	. "B")
    (iso-2022-jp-3	. "B")
    (iso-2022-kr	. "B")
    (gb2312		. "B")
    (cn-gb		. "B")
    (cn-gb-2312		. "B")
    (euc-kr		. "B")
    (tis-620		. "B")
    (iso-2022-jp-2	. "B")
    (iso-2022-int-1	. "B")
    (utf-8		. "B")
    ))

(defvar mime-header-default-charset-encoding "Q")

(defvar mime-header-encode-method-alist
  '((eword-encode-address-list
     . (Reply-To
	From Sender
	Resent-Reply-To Resent-From
	Resent-Sender To Resent-To
	Cc Resent-Cc Bcc Resent-Bcc
	Dcc))
    (eword-encode-in-reply-to . (In-Reply-To))
    (eword-encode-structured-field-body . (Mime-Version User-Agent))
    (eword-encode-unstructured-field-body)))

;;; @ encoded-text encoder
;;;

(defun eword-encode-text (charset encoding string &optional mode)
  "Encode STRING as an encoded-word, and return the result.
CHARSET is a symbol to indicate MIME charset of the encoded-word.
ENCODING allows \"B\" or \"Q\".
MODE is allows `text', `comment', `phrase' or nil.  Default value is
`phrase'."
  (let ((text (encoded-text-encode-string string encoding mode)))
    (if text
	(concat "=?" (upcase (symbol-name charset)) "?"
		encoding "?" text "?=")
      )))


;;; @ charset word
;;;

(defsubst eword-encode-char-type (character)
  (if (memq character '(?  ?\t ?\n))
      nil
    (char-charset character)
    ))

(defun eword-encode-divide-into-charset-words (string)
  (let ((len (length string))
	dest)
    (while (> len 0)
      (let* ((chr (aref string 0))
             ;; (chr (sref string 0))
	     (charset (eword-encode-char-type chr))
             (i 1)
	     ;; (i (char-length chr))
	     )
	(while (and (< i len)
		    (setq chr (aref string i))
                    ;; (setq chr (sref string i))
		    (eq charset (eword-encode-char-type chr)))
	  (setq i (1+ i))
          ;; (setq i (char-next-index chr i))
	  )
	(setq dest (cons (cons charset (substring string 0 i)) dest)
	      string (substring string i)
	      len (- len i))))
    (nreverse dest)))


;;; @ word
;;;

(defun eword-encode-charset-words-to-words (charset-words)
  (let (dest)
    (while charset-words
      (let* ((charset-word (car charset-words))
	     (charset (car charset-word))
	     )
	(if charset
	    (let ((charsets (list charset))
		  (str (cdr charset-word))
		  )
	      (catch 'tag
		(while (setq charset-words (cdr charset-words))
		  (setq charset-word (car charset-words)
			charset (car charset-word))
		  (if (null charset)
		      (throw 'tag nil)
		    )
		  (or (memq charset charsets)
		      (setq charsets (cons charset charsets))
		      )
		  (setq str (concat str (cdr charset-word)))
		  ))
	      (setq dest (cons (cons charsets str) dest))
	      )
	  (setq dest (cons charset-word dest)
		charset-words (cdr charset-words)
		))))
    (nreverse dest)
    ))


;;; @ rule
;;;

(defmacro make-ew-rword (text charset encoding type)
  (` (list (, text)(, charset)(, encoding)(, type))))
(defmacro ew-rword-text (rword)
  (` (car (, rword))))
(defmacro ew-rword-charset (rword)
  (` (car (cdr (, rword)))))
(defmacro ew-rword-encoding (rword)
  (` (car (cdr (cdr (, rword))))))
(defmacro ew-rword-type (rword)
  (` (car (cdr (cdr (cdr (, rword)))))))

(defun ew-find-charset-rule (charsets)
  (if charsets
      (let* ((charset (find-mime-charset-by-charsets charsets))
	     (encoding
	      (cdr (or (assq charset mime-header-charset-encoding-alist)
		       (cons charset mime-header-default-charset-encoding)))))
	(list charset encoding))))

;; [tomo:2002-11-05] The following code is a quick-fix for emacsen
;; which is not depended on the Mule model.  We should redesign
;; `eword-encode-split-string' to avoid to depend on the Mule model.
(if (featurep 'utf-2000)
;; for CHISE Architecture
(defun tm-eword::words-to-ruled-words (wl &optional mode)
  (let (mcs)
    (mapcar (function
	     (lambda (word)
	       (setq mcs (detect-mime-charset-string (cdr word)))
	       (make-ew-rword
		(cdr word)
		mcs
		(cdr (or (assq mcs mime-header-charset-encoding-alist)
			 (cons mcs mime-header-default-charset-encoding)))
		mode)
	       ))
	    wl)))

;; for legacy Mule
(defun tm-eword::words-to-ruled-words (wl &optional mode)
  (mapcar (function
	   (lambda (word)
	     (let ((ret (ew-find-charset-rule (car word))))
	       (make-ew-rword (cdr word) (car ret)(nth 1 ret) mode)
	       )))
	  wl))
)

(defun ew-space-process (seq)
  (let (prev a ac b c cc)
    (while seq
      (setq b (car seq))
      (setq seq (cdr seq))
      (setq c (car seq))
      (setq cc (ew-rword-charset c))
      (if (and (null (ew-rword-charset b))
	       (not (eq (ew-rword-type b) 'special)))
	  (progn
	    (setq a (car prev))
	    (setq ac (ew-rword-charset a))
	    (if (and (ew-rword-encoding a)
		     (ew-rword-encoding c))
		(cond ((eq ac cc)
		       (setq prev (cons
				   (cons (concat (car a)(car b)(car c))
					 (cdr a))
				   (cdr prev)
				   ))
		       (setq seq (cdr seq))
		       )
		      (t
		       (setq prev (cons
				   (cons (concat (car a)(car b))
					 (cdr a))
				   (cdr prev)
				   ))
		       ))
	      (setq prev (cons b prev))
	      ))
	(setq prev (cons b prev))
	))
    (reverse prev)
    ))

(defun eword-encode-split-string (str &optional mode)
  (ew-space-process
   (tm-eword::words-to-ruled-words
    (eword-encode-charset-words-to-words
     (eword-encode-divide-into-charset-words str))
    mode)))


;;; @ length
;;;

(defun tm-eword::encoded-word-length (rword)
  (let ((string   (ew-rword-text     rword))
	(charset  (ew-rword-charset  rword))
	(encoding (ew-rword-encoding rword))
	ret)
    (setq ret
	  (cond ((string-equal encoding "B")
		 (setq string (encode-mime-charset-string string charset))
		 (base64-encoded-length string)
		 )
		((string-equal encoding "Q")
		 (setq string (encode-mime-charset-string string charset))
		 (Q-encoded-text-length string (ew-rword-type rword))
		 )))
    (if ret
	(cons (+ 7 (length (symbol-name charset)) ret) string)
      )))


;;; @ encode-string
;;;

(defun ew-encode-rword-1 (column rwl &optional must-output)
  (catch 'can-not-output
    (let* ((rword (car rwl))
	   (ret (tm-eword::encoded-word-length rword))
	   string len)
      (if (null ret)
	  (cond ((and (setq string (car rword))
		      (or (<= (setq len (+ (length string) column)) 76)
			  (<= column 1))
		      )
		 (setq rwl (cdr rwl))
		 )
		((memq (aref string 0) '(?  ?\t))
		 (setq string (concat "\n" string)
		       len (length string)
		       rwl (cdr rwl))
		 )
		(must-output
		 (setq string "\n "
		       len 1)
		 )
		(t
		 (throw 'can-not-output nil)
		 ))
	(cond ((and (setq len (car ret))
		    (<= (+ column len) 76)
		    )
	       (setq string
		     (eword-encode-text
		      (ew-rword-charset rword)
		      (ew-rword-encoding rword)
		      (cdr ret)
		      (ew-rword-type rword)
		      ))
	       (setq len (+ (length string) column))
	       (setq rwl (cdr rwl))
	       )
	      (t
	       (setq string (car rword))
	       (let* ((p 0) np
		      (str "") nstr)
		 (while (and (< p len)
			     (progn
			       (setq np (1+ p))
			       ;;(setq np (char-next-index (sref string p) p))
			       (setq nstr (substring string 0 np))
			       (setq ret (tm-eword::encoded-word-length
					  (cons nstr (cdr rword))
					  ))
			       (setq nstr (cdr ret))
			       (setq len (+ (car ret) column))
			       (<= len 76)
			       ))
		   (setq str nstr
			 p np))
		 (if (string-equal str "")
		     (if must-output
			 (setq string "\n "
			       len 1)
		       (throw 'can-not-output nil))
		   (setq rwl (cons (cons (substring string p) (cdr rword))
				   (cdr rwl)))
		   (setq string
			 (eword-encode-text
			  (ew-rword-charset rword)
			  (ew-rword-encoding rword)
			  str
			  (ew-rword-type rword)))
		   (setq len (+ (length string) column))
		   )
		 )))
	)
      (list string len rwl)
      )))

(defun eword-encode-rword-list (column rwl)
  (let (ret dest str ew-f pew-f folded-points)
    (while rwl
      (setq ew-f (nth 2 (car rwl)))
      (if (and pew-f ew-f)
	  (setq rwl (cons '(" ") rwl)
		pew-f nil)
	(setq pew-f ew-f)
	)
      (if (null (setq ret (ew-encode-rword-1 column rwl)))
	  (let ((i (1- (length dest)))
		c s r-dest r-column)
	    (catch 'success
	      (while (catch 'found
		       (while (>= i 0)
			 (cond ((memq (setq c (aref dest i)) '(?  ?\t))
				(if (memq i folded-points)
				    (throw 'found nil)
				  (setq folded-points (cons i folded-points))
				  (throw 'found i))
				)
			       ((eq c ?\n)
				(throw 'found nil)
				))
			 (setq i (1- i))))
		(setq s (substring dest i)
		      r-column (length s)
		      r-dest (concat (substring dest 0 i) "\n" s))
		(when (setq ret (ew-encode-rword-1 r-column rwl))
		  (setq dest r-dest
			column r-column)
		  (throw 'success t)
		  ))
	      (setq ret (ew-encode-rword-1 column rwl 'must-output))
	      )))
      (setq str (car ret))
      (setq dest (concat dest str))
      (setq column (nth 1 ret)
	    rwl (nth 2 ret))
      )
    (list dest column)
    ))


;;; @ converter
;;;

(defun eword-encode-phrase-to-rword-list (phrase)
  (let (token type dest str)
    (while phrase
      (setq token (car phrase))
      (setq type (car token))
      (cond ((eq type 'quoted-string)
	     (setq str (concat "\"" (cdr token) "\""))
	     (setq dest
		   (append dest
			   (list
			    (let ((ret (ew-find-charset-rule
					(find-charset-string str))))
			      (make-ew-rword
			       str (car ret)(nth 1 ret) 'phrase)
			      )
			    )))
	     )
	    ((eq type 'comment)
	     (setq dest
		   (append dest
			   '(("(" nil nil special))
			   (tm-eword::words-to-ruled-words
			    (eword-encode-charset-words-to-words
			     (eword-encode-divide-into-charset-words
			      (cdr token)))
			    'comment)
			   '((")" nil nil special))
			   ))
	     )
	    (t
	     (setq dest
		   (append dest
			   (tm-eword::words-to-ruled-words
			    (eword-encode-charset-words-to-words
			     (eword-encode-divide-into-charset-words
			      (cdr token))
			     ) 'phrase)))
	     ))
      (setq phrase (cdr phrase))
      )
    (ew-space-process dest)
    ))

(defun eword-encode-addr-seq-to-rword-list (seq)
  (let (dest pname)
    (while seq
      (let* ((token (car seq))
	     (name (car token))
	     )
	(cond ((eq name 'spaces)
	       (setq dest (nconc dest (list (list (cdr token) nil nil))))
	       )
	      ((eq name 'comment)
	       (setq dest
		     (nconc
		      dest
		      (list (list "(" nil nil))
		      (eword-encode-split-string (cdr token) 'comment)
		      (list (list ")" nil nil))
		      ))
	       )
	      ((eq name 'quoted-string)
	       (setq dest
		     (nconc
		      dest
		      (list
		       (list (concat "\"" (cdr token) "\"") nil nil)
		       )))
	       )
	      (t
	       (setq dest
		     (if (or (eq pname 'spaces)
			     (eq pname 'comment))
			 (nconc dest (list (list (cdr token) nil nil)))
		       (nconc (nreverse (cdr (reverse dest)))
			      ;; (butlast dest)
			      (list
			       (list (concat (car (car (last dest)))
					     (cdr token))
				     nil nil)))))
	       ))
	(setq seq (cdr seq)
	      pname name))
      )
    dest))

(defun eword-encode-phrase-route-addr-to-rword-list (phrase-route-addr)
  (if (eq (car phrase-route-addr) 'phrase-route-addr)
      (let ((phrase (nth 1 phrase-route-addr))
	    (route (nth 2 phrase-route-addr))
	    dest)
        ;; (if (eq (car (car phrase)) 'spaces)
        ;;     (setq phrase (cdr phrase))
        ;;   )
	(setq dest (eword-encode-phrase-to-rword-list phrase))
	(if dest
	    (setq dest (append dest '((" " nil nil))))
	  )
	(append
	 dest
	 (eword-encode-addr-seq-to-rword-list
	  (append '((specials . "<"))
		  route
		  '((specials . ">"))))
	 ))))

(defun eword-encode-addr-spec-to-rword-list (addr-spec)
  (if (eq (car addr-spec) 'addr-spec)
      (eword-encode-addr-seq-to-rword-list (cdr addr-spec))
    ))

(defun eword-encode-mailbox-to-rword-list (mbox)
  (let ((addr (nth 1 mbox))
	(comment (nth 2 mbox))
	dest)
    (setq dest (or (eword-encode-phrase-route-addr-to-rword-list addr)
		   (eword-encode-addr-spec-to-rword-list addr)
		   ))
    (if comment
	(setq dest
	      (append dest
		      '((" " nil nil)
			("(" nil nil))
		      (eword-encode-split-string comment 'comment)
		      (list '(")" nil nil))
		      )))
    dest))

(defsubst eword-encode-mailboxes-to-rword-list (mboxes)
  (let ((dest (eword-encode-mailbox-to-rword-list (car mboxes))))
    (if dest
	(while (setq mboxes (cdr mboxes))
	  (setq dest
		(nconc dest
		       (list '("," nil nil))
		       (eword-encode-mailbox-to-rword-list
			(car mboxes))))))
    dest))

(defsubst eword-encode-address-to-rword-list (address)
  (cond
   ((eq (car address) 'mailbox)
    (eword-encode-mailbox-to-rword-list address))
   ((eq (car address) 'group)
    (nconc
     (eword-encode-phrase-to-rword-list (nth 1 address))
     (list (list ":" nil nil))
     (eword-encode-mailboxes-to-rword-list (nth 2 address))
     (list (list ";" nil nil))))))

(defsubst eword-encode-addresses-to-rword-list (addresses)
  (let ((dest (eword-encode-address-to-rword-list (car addresses))))
    (if dest
	(while (setq addresses (cdr addresses))
	  (setq dest
		(nconc dest
		       (list '("," nil nil))
		       ;; (list '(" " nil nil))
		       (eword-encode-address-to-rword-list (car addresses))))))
    dest))

(defsubst eword-encode-msg-id-to-rword-list (msg-id)
  (list
   (list
    (concat "<"
	    (caar (eword-encode-addr-seq-to-rword-list (cdr msg-id)))
	    ">")
    nil nil)))

(defsubst eword-encode-in-reply-to-to-rword-list (in-reply-to)
  (let (dest)
    (while in-reply-to
      (setq dest
	    (append dest
		    (let ((elt (car in-reply-to)))
		      (if (eq (car elt) 'phrase)
			  (eword-encode-phrase-to-rword-list (cdr elt))
			(eword-encode-msg-id-to-rword-list elt)
			))))
      (setq in-reply-to (cdr in-reply-to)))
    dest))


;;; @ application interfaces
;;;

(defvar eword-encode-default-start-column 10
  "Default start column if it is omitted.")

(defun eword-encode-string (string &optional column mode)
  "Encode STRING as encoded-words, and return the result.
Optional argument COLUMN is start-position of the field.
Optional argument MODE allows `text', `comment', `phrase' or nil.
Default value is `phrase'."
  (car (eword-encode-rword-list
	(or column eword-encode-default-start-column)
	(eword-encode-split-string string mode))))

(defun eword-encode-address-list (string &optional column)
  "Encode header field STRING as list of address, and return the result.
Optional argument COLUMN is start-position of the field."
  (car (eword-encode-rword-list
	(or column eword-encode-default-start-column)
	(eword-encode-addresses-to-rword-list
	 (std11-parse-addresses-string string))
	)))

(defun eword-encode-in-reply-to (string &optional column)
  "Encode header field STRING as In-Reply-To field, and return the result.
Optional argument COLUMN is start-position of the field."
  (car (eword-encode-rword-list
	(or column 13)
	(eword-encode-in-reply-to-to-rword-list
	 (std11-parse-msg-ids-string string)))))

(defun eword-encode-structured-field-body (string &optional column)
  "Encode header field STRING as structured field, and return the result.
Optional argument COLUMN is start-position of the field."
  (car (eword-encode-rword-list
	(or column eword-encode-default-start-column)
	(eword-encode-addr-seq-to-rword-list (std11-lexical-analyze string))
	)))

(defun eword-encode-unstructured-field-body (string &optional column)
  "Encode header field STRING as unstructured field, and return the result.
Optional argument COLUMN is start-position of the field."
  (car (eword-encode-rword-list
	(or column eword-encode-default-start-column)
	(eword-encode-split-string string 'text))))

;;;###autoload
(defun mime-encode-field-body (field-body field-name)
  "Encode FIELD-BODY as FIELD-NAME, and return the result.
A lexical token includes non-ASCII character is encoded as MIME
encoded-word.  ASCII token is not encoded."
  (setq field-body (std11-unfold-string field-body))
  (if (string= field-body "")
      ""
    (let ((method-alist mime-header-encode-method-alist)
	  start ret)
      (if (symbolp field-name)
	  (setq start (1+ (length (symbol-name field-name))))
	(setq start (1+ (length field-name))
	      field-name (intern (capitalize field-name))))
      (while (car method-alist)
	(if (or (not (cdr (car method-alist)))
		(memq field-name
		      (cdr (car method-alist))))
	    (progn
	      (setq ret
		    (apply (caar method-alist) (list field-body start)))
	      (setq method-alist nil)))
	(setq method-alist (cdr method-alist)))
      ret)))
(defalias 'eword-encode-field-body 'mime-encode-field-body)
(make-obsolete 'eword-encode-field-body 'mime-encode-field-body)

(defun eword-in-subject-p ()
  (let ((str (std11-field-body "Subject")))
    (if (and str (string-match eword-encoded-word-regexp str))
	str)))
(make-obsolete 'eword-in-subject-p "Don't use it.")

(defsubst eword-find-field-encoding-method (field-name)
  (setq field-name (downcase field-name))
  (let ((alist mime-field-encoding-method-alist))
    (catch 'found
      (while alist
	(let* ((pair (car alist))
	       (str (car pair)))
	  (if (and (stringp str)
		   (string= field-name (downcase str)))
	      (throw 'found (cdr pair))
	    ))
	(setq alist (cdr alist)))
      (cdr (assq t mime-field-encoding-method-alist))
      )))

;;;###autoload
(defun mime-encode-header-in-buffer (&optional code-conversion)
  "Encode header fields to network representation, such as MIME encoded-word.
It refers the `mime-field-encoding-method-alist' variable."
  (interactive "*")
  (save-excursion
    (save-restriction
      (std11-narrow-to-header mail-header-separator)
      (goto-char (point-min))
      (let ((default-cs (mime-charset-to-coding-system default-mime-charset))
	    bbeg end field-name)
	(while (re-search-forward std11-field-head-regexp nil t)
	  (setq bbeg (match-end 0)
		field-name (buffer-substring-no-properties (match-beginning 0)
							   (1- bbeg))
		end (std11-field-end))
	  (and (delq 'ascii (find-charset-region bbeg end))
	       (let ((method (eword-find-field-encoding-method
			      (downcase field-name))))
		 (cond ((eq method 'mime)
			(let* ((field-body
				(buffer-substring-no-properties bbeg end))
			       (encoded-body
				(mime-encode-field-body
				 field-body field-name)))
			  (if (not encoded-body)
			      (error "Cannot encode %s:%s"
				     field-name field-body)
			    (delete-region bbeg end)
			    (insert encoded-body))))
		       (code-conversion
			(let ((cs
			       (or (mime-charset-to-coding-system
				    method)
				   default-cs)))
			  (encode-coding-region bbeg end cs)))))))))))
(defalias 'eword-encode-header 'mime-encode-header-in-buffer)
(make-obsolete 'eword-encode-header 'mime-encode-header-in-buffer)


;;; @ end
;;;

(provide 'eword-encode)

;;; eword-encode.el ends here
