src/org/sonews/storage/DrupalMessage.java
author František Kučera <franta-hg@frantovo.cz>
Wed Oct 19 17:23:53 2011 +0200 (2011-10-19)
changeset 100 08c9fb6fb017
parent 89 c60625d58158
child 102 d843b4fee5dc
permissions -rw-r--r--
Drupal: tidy – správné označování konců řádků (jen tam, kde už je nějaký text)
     1 /*
     2  *   SONEWS News Server
     3  *   see AUTHORS for the list of contributors
     4  *
     5  *   This program is free software: you can redistribute it and/or modify
     6  *   it under the terms of the GNU General Public License as published by
     7  *   the Free Software Foundation, either version 3 of the License, or
     8  *   (at your option) any later version.
     9  *
    10  *   This program is distributed in the hope that it will be useful,
    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *   GNU General Public License for more details.
    14  *
    15  *   You should have received a copy of the GNU General Public License
    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package org.sonews.storage;
    19 
    20 import java.io.BufferedReader;
    21 import java.io.ByteArrayOutputStream;
    22 import java.io.IOException;
    23 import java.io.InputStream;
    24 import java.io.InputStreamReader;
    25 import java.io.PrintStream;
    26 import java.io.StringReader;
    27 import java.io.StringWriter;
    28 import java.io.UnsupportedEncodingException;
    29 import java.sql.ResultSet;
    30 import java.sql.SQLException;
    31 import java.util.ArrayList;
    32 import java.util.Date;
    33 import java.util.Enumeration;
    34 import java.util.logging.Level;
    35 import java.util.logging.Logger;
    36 import javax.mail.Header;
    37 import javax.mail.MessagingException;
    38 import javax.mail.Multipart;
    39 import javax.mail.Session;
    40 import javax.mail.internet.InternetAddress;
    41 import javax.mail.internet.MimeBodyPart;
    42 import javax.mail.internet.MimeMessage;
    43 import javax.mail.internet.MimeMultipart;
    44 import javax.xml.transform.Transformer;
    45 import javax.xml.transform.TransformerFactory;
    46 import javax.xml.transform.stream.StreamResult;
    47 import javax.xml.transform.stream.StreamSource;
    48 import org.sonews.util.io.Resource;
    49 
    50 /**
    51  * This is MimeMessage which enables custom Message-ID header
    52  * (this header will not be overwritten by the default one like in MimeMessage).
    53  * 
    54  * Also add header and body separate serialization.
    55  * 
    56  * And can be deserialized from SQL ResultSet
    57  * 
    58  * @author František Kučera (frantovo.cz)
    59  */
    60 public class DrupalMessage extends MimeMessage {
    61 
    62 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
    63 	private static final String MESSAGE_ID_HEADER = "Message-ID";
    64 	private static final String CRLF = "\r\n";
    65 	public static final String CHARSET = "UTF-8";
    66 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
    67 	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
    68 	private String messageID;
    69 
    70 	/**
    71 	 * Constructs MIME message from SQL result.
    72 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
    73 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
    74 	 */
    75 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException {
    76 		super(Session.getDefaultInstance(System.getProperties()));
    77 
    78 		addHeader("Message-id", constructMessageId(rs.getInt("id"), rs.getInt("group_id"), rs.getString("group_name"), myDomain));
    79 		addHeader("Newsgroups", rs.getString("group_name"));
    80 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
    81 		setSubject(rs.getString("subject"));
    82 		setSentDate(new Date(rs.getLong("created")));
    83 
    84 		int parentID = rs.getInt("parent_id");
    85 		if (parentID > 0) {
    86 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
    87 			addHeader("In-Reply-To", parentMessageID);
    88 			addHeader("References", parentMessageID);
    89 		}
    90 
    91 		if (constructBody) {
    92 			Multipart multipart = new MimeMultipart("alternative");
    93 			setContent(multipart);
    94 
    95 			/** XHTML part */
    96 			MimeBodyPart htmlPart = new MimeBodyPart();
    97 			String xhtmlText = readXhtmlText(rs);
    98 			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
    99 
   100 			/** Plain text part */
   101 			MimeBodyPart textPart = new MimeBodyPart();
   102 			String plainText = readPlainText(rs, xhtmlText);
   103 			textPart.setText(plainText);
   104 			//addHeader("Lines", String.valueOf(plainText.split("\n").length));
   105 
   106 			/**
   107 			 * Thunderbirdu záleží, v jakém pořadí části jsou 
   108 			 * (když je prostý text druhý, html se nezobrazí),
   109 			 * KNode zobrazuje HTML správně, i když je na prvním místě.
   110 			 */
   111 			multipart.addBodyPart(textPart);
   112 			multipart.addBodyPart(htmlPart);
   113 		} else {
   114 			/** empty body, just headers */
   115 			setText("");
   116 		}
   117 	}
   118 
   119 	private String readPlainText(ResultSet rs, String xhtmlText) {
   120 		try {
   121 			TransformerFactory tf = TransformerFactory.newInstance();
   122 			Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
   123 
   124 			StringReader input = new StringReader(xhtmlText);
   125 			StringWriter output = new StringWriter(xhtmlText.length());
   126 			textTransformer.transform(new StreamSource(input), new StreamResult(output));
   127 
   128 			return output.toString();
   129 		} catch (Exception e) {
   130 			/**
   131 			 * TODO: lepší ošetření chyby
   132 			 */
   133 			log.log(Level.WARNING, "Error while transforming article to plain text", e);
   134 			return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě.");
   135 		}
   136 	}
   137 
   138 	private String readXhtmlText(ResultSet rs) {
   139 		/**
   140 		 * TODO: 
   141 		 *		- znovupoužívat XSL transformér
   142 		 *		- používat cache, ukládat si vygenerované články
   143 		 */
   144 		try {
   145 			String inputText = makeSimpleXHTML(rs.getString("text"));
   146 
   147 			TransformerFactory tf = TransformerFactory.newInstance();
   148 			Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
   149 
   150 			String paragraphedText;
   151 			boolean tidyWasUsed = false;
   152 			try {
   153 				StringReader input = new StringReader(inputText);
   154 				StringWriter output = new StringWriter(2 * inputText.length());
   155 				paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
   156 				paragraphedText = output.toString();
   157 			} catch (Exception e) {
   158 				log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
   159 				StringReader input = new StringReader(tidyXhtml(inputText));
   160 				StringWriter output = new StringWriter(2 * inputText.length());
   161 				paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
   162 				paragraphedText = output.toString();
   163 				tidyWasUsed = true;
   164 			}
   165 
   166 			Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
   167 			xhtmlTransformer.setParameter("isRoot", (rs.getInt("parent_id") == 0));
   168 			xhtmlTransformer.setParameter("title", rs.getString("subject"));
   169 			xhtmlTransformer.setParameter("urlBase", rs.getString("urlBase"));
   170 			xhtmlTransformer.setParameter("wwwRead", rs.getString("wwwRead"));
   171 			xhtmlTransformer.setParameter("wwwPost", rs.getString("wwwPost"));
   172 			xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
   173 			StringReader input = new StringReader(paragraphedText);
   174 			StringWriter output = new StringWriter(2 * paragraphedText.length());
   175 			xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output));
   176 
   177 			return output.toString();
   178 		} catch (Exception e) {
   179 			/**
   180 			 * TODO: lepší ošetření chyby
   181 			 */
   182 			log.log(Level.WARNING, "Error while transforming article to XHTML", e);
   183 			return makeSimpleXHTML("<p>Při transformaci příspěvku bohužel došlo k chybě.</p>");
   184 		}
   185 	}
   186 
   187 	private static String makeSimpleXHTML(String body) {
   188 		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
   189 	}
   190 
   191 	/**
   192 	 * TODO: refaktorovat, přesunout
   193 	 */
   194 	private static String tidyXhtml(String inputText) throws IOException {
   195 		/*
   196 		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
   197 		 *
   198 		 * TODO:
   199 		 *		- použít delší zástupný řetězec, ne jen jeden znak
   200 		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
   201 		 */
   202 		inputText = označKonceŘádků(inputText);
   203 
   204 		Runtime r = Runtime.getRuntime();
   205 		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
   206 					"-asxml", // well formed XHTML
   207 					"-numeric", // číselné entity
   208 					"-utf8", // kódování
   209 					"--show-warnings", "false", // žádná varování nás nezajímají
   210 					"--show-errors", "0", // ani chyby
   211 					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
   212 					"--logical-emphasis", "true", // em a strong místo i a b
   213 					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
   214 					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
   215 				});
   216 
   217 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
   218 		vstupProcesu.print(inputText);
   219 		vstupProcesu.close();
   220 
   221 		String outputText = streamToString(p.getInputStream());
   222 
   223 		outputText = vraťKonceŘádků(outputText);
   224 
   225 		return outputText;
   226 	}
   227 
   228 	private static String označKonceŘádků(String text) {
   229 		text = text.replaceAll(">\\s+<", "> <");
   230 		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
   231 		return text;
   232 	}
   233 
   234 	private static String vraťKonceŘádků(String text) {
   235 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
   236 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
   237 		return text;
   238 	}
   239 
   240 	/**
   241 	 * TODO: refaktorovat, přesunout
   242 	 */
   243 	private static String streamToString(InputStream proud) throws IOException {
   244 		StringBuilder výsledek = new StringBuilder();
   245 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
   246 		while (true) {
   247 			String radek = buf.readLine();
   248 			if (radek == null) {
   249 				break;
   250 			} else {
   251 				výsledek.append(radek);
   252 				výsledek.append("\n");
   253 			}
   254 		}
   255 		return výsledek.toString();
   256 	}
   257 
   258 	private static String constructMessageId(int articleID, int groupID, String groupName, String domainName) {
   259 		StringBuilder sb = new StringBuilder();
   260 		sb.append("<");
   261 		sb.append(articleID);
   262 		sb.append("-");
   263 		sb.append(groupID);
   264 		sb.append("-");
   265 		sb.append(groupName);
   266 		sb.append("@");
   267 		sb.append(domainName);
   268 		sb.append(">");
   269 		return sb.toString();
   270 	}
   271 
   272 	@Override
   273 	public void setHeader(String name, String value) throws MessagingException {
   274 		super.setHeader(name, value);
   275 
   276 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   277 			messageID = value;
   278 		}
   279 	}
   280 
   281 	@Override
   282 	public final void addHeader(String name, String value) throws MessagingException {
   283 		super.addHeader(name, value);
   284 
   285 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   286 			messageID = value;
   287 		}
   288 	}
   289 
   290 	@Override
   291 	public void removeHeader(String name) throws MessagingException {
   292 		super.removeHeader(name);
   293 
   294 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   295 			messageID = null;
   296 		}
   297 	}
   298 
   299 	public void setMessageID(String messageID) {
   300 		this.messageID = messageID;
   301 	}
   302 
   303 	@Override
   304 	protected void updateMessageID() throws MessagingException {
   305 		if (messageID == null) {
   306 			super.updateMessageID();
   307 		} else {
   308 			setHeader(MESSAGE_ID_HEADER, messageID);
   309 		}
   310 	}
   311 
   312 	/**
   313 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
   314 	 * 
   315 	 * <pre>MIME-Version: 1.0
   316 	 *Content-Type: multipart/alternative;</pre>
   317 	 * 
   318 	 * @return serialized headers
   319 	 * @throws MessagingException if getAllHeaders() fails
   320 	 */
   321 	public String getHeaders() throws MessagingException {
   322 		StringBuilder sb = new StringBuilder();
   323 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
   324 			sb.append(eh.nextElement());
   325 			sb.append(CRLF);
   326 		}
   327 		return sb.toString();
   328 	}
   329 
   330 	public byte[] getBody() throws IOException, MessagingException {
   331 		saveChanges();
   332 
   333 		ArrayList<String> skipHeaders = new ArrayList<String>();
   334 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
   335 			Header h = (Header) eh.nextElement();
   336 			skipHeaders.add(h.getName());
   337 		}
   338 
   339 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
   340 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
   341 		return baos.toByteArray();
   342 	}
   343 }