src/org/sonews/storage/DrupalMessage.java
author František Kučera <franta-hg@frantovo.cz>
Wed Oct 19 17:23:53 2011 +0200 (2011-10-19)
changeset 100 08c9fb6fb017
parent 89 c60625d58158
child 102 d843b4fee5dc
permissions -rw-r--r--
Drupal: tidy – správné označování konců řádků (jen tam, kde už je nějaký text)
franta-hg@72
     1
/*
franta-hg@72
     2
 *   SONEWS News Server
franta-hg@72
     3
 *   see AUTHORS for the list of contributors
franta-hg@72
     4
 *
franta-hg@72
     5
 *   This program is free software: you can redistribute it and/or modify
franta-hg@72
     6
 *   it under the terms of the GNU General Public License as published by
franta-hg@72
     7
 *   the Free Software Foundation, either version 3 of the License, or
franta-hg@72
     8
 *   (at your option) any later version.
franta-hg@72
     9
 *
franta-hg@72
    10
 *   This program is distributed in the hope that it will be useful,
franta-hg@72
    11
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
franta-hg@72
    12
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
franta-hg@72
    13
 *   GNU General Public License for more details.
franta-hg@72
    14
 *
franta-hg@72
    15
 *   You should have received a copy of the GNU General Public License
franta-hg@72
    16
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
franta-hg@72
    17
 */
franta-hg@72
    18
package org.sonews.storage;
franta-hg@72
    19
franta-hg@75
    20
import java.io.BufferedReader;
franta-hg@72
    21
import java.io.ByteArrayOutputStream;
franta-hg@72
    22
import java.io.IOException;
franta-hg@75
    23
import java.io.InputStream;
franta-hg@75
    24
import java.io.InputStreamReader;
franta-hg@75
    25
import java.io.PrintStream;
franta-hg@74
    26
import java.io.StringReader;
franta-hg@74
    27
import java.io.StringWriter;
franta-hg@72
    28
import java.io.UnsupportedEncodingException;
franta-hg@72
    29
import java.sql.ResultSet;
franta-hg@72
    30
import java.sql.SQLException;
franta-hg@72
    31
import java.util.ArrayList;
franta-hg@72
    32
import java.util.Date;
franta-hg@72
    33
import java.util.Enumeration;
franta-hg@74
    34
import java.util.logging.Level;
franta-hg@74
    35
import java.util.logging.Logger;
franta-hg@72
    36
import javax.mail.Header;
franta-hg@72
    37
import javax.mail.MessagingException;
franta-hg@72
    38
import javax.mail.Multipart;
franta-hg@72
    39
import javax.mail.Session;
franta-hg@72
    40
import javax.mail.internet.InternetAddress;
franta-hg@72
    41
import javax.mail.internet.MimeBodyPart;
franta-hg@72
    42
import javax.mail.internet.MimeMessage;
franta-hg@72
    43
import javax.mail.internet.MimeMultipart;
franta-hg@74
    44
import javax.xml.transform.Transformer;
franta-hg@74
    45
import javax.xml.transform.TransformerFactory;
franta-hg@74
    46
import javax.xml.transform.stream.StreamResult;
franta-hg@74
    47
import javax.xml.transform.stream.StreamSource;
franta-hg@74
    48
import org.sonews.util.io.Resource;
franta-hg@72
    49
franta-hg@72
    50
/**
franta-hg@72
    51
 * This is MimeMessage which enables custom Message-ID header
franta-hg@72
    52
 * (this header will not be overwritten by the default one like in MimeMessage).
franta-hg@72
    53
 * 
franta-hg@72
    54
 * Also add header and body separate serialization.
franta-hg@72
    55
 * 
franta-hg@72
    56
 * And can be deserialized from SQL ResultSet
franta-hg@72
    57
 * 
franta-hg@72
    58
 * @author František Kučera (frantovo.cz)
franta-hg@72
    59
 */
franta-hg@72
    60
public class DrupalMessage extends MimeMessage {
franta-hg@72
    61
franta-hg@74
    62
	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
franta-hg@72
    63
	private static final String MESSAGE_ID_HEADER = "Message-ID";
franta-hg@72
    64
	private static final String CRLF = "\r\n";
franta-hg@72
    65
	public static final String CHARSET = "UTF-8";
franta-hg@72
    66
	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
franta-hg@100
    67
	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
franta-hg@72
    68
	private String messageID;
franta-hg@72
    69
franta-hg@72
    70
	/**
franta-hg@72
    71
	 * Constructs MIME message from SQL result.
franta-hg@72
    72
	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
franta-hg@72
    73
	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
franta-hg@72
    74
	 */
franta-hg@72
    75
	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException {
franta-hg@72
    76
		super(Session.getDefaultInstance(System.getProperties()));
franta-hg@72
    77
franta-hg@72
    78
		addHeader("Message-id", constructMessageId(rs.getInt("id"), rs.getInt("group_id"), rs.getString("group_name"), myDomain));
franta-hg@72
    79
		addHeader("Newsgroups", rs.getString("group_name"));
franta-hg@74
    80
		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
franta-hg@72
    81
		setSubject(rs.getString("subject"));
franta-hg@72
    82
		setSentDate(new Date(rs.getLong("created")));
franta-hg@74
    83
franta-hg@74
    84
		int parentID = rs.getInt("parent_id");
franta-hg@74
    85
		if (parentID > 0) {
franta-hg@72
    86
			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
franta-hg@72
    87
			addHeader("In-Reply-To", parentMessageID);
franta-hg@72
    88
			addHeader("References", parentMessageID);
franta-hg@72
    89
		}
franta-hg@72
    90
franta-hg@72
    91
		if (constructBody) {
franta-hg@72
    92
			Multipart multipart = new MimeMultipart("alternative");
franta-hg@72
    93
			setContent(multipart);
franta-hg@72
    94
franta-hg@82
    95
			/** XHTML part */
franta-hg@82
    96
			MimeBodyPart htmlPart = new MimeBodyPart();
franta-hg@82
    97
			String xhtmlText = readXhtmlText(rs);
franta-hg@82
    98
			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
franta-hg@84
    99
franta-hg@74
   100
			/** Plain text part */
franta-hg@72
   101
			MimeBodyPart textPart = new MimeBodyPart();
franta-hg@89
   102
			String plainText = readPlainText(rs, xhtmlText);
franta-hg@89
   103
			textPart.setText(plainText);
franta-hg@89
   104
			//addHeader("Lines", String.valueOf(plainText.split("\n").length));
franta-hg@87
   105
franta-hg@87
   106
			/**
franta-hg@87
   107
			 * Thunderbirdu záleží, v jakém pořadí části jsou 
franta-hg@87
   108
			 * (když je prostý text druhý, html se nezobrazí),
franta-hg@87
   109
			 * KNode zobrazuje HTML správně, i když je na prvním místě.
franta-hg@87
   110
			 */
franta-hg@72
   111
			multipart.addBodyPart(textPart);
franta-hg@87
   112
			multipart.addBodyPart(htmlPart);
franta-hg@72
   113
		} else {
franta-hg@82
   114
			/** empty body, just headers */
franta-hg@72
   115
			setText("");
franta-hg@72
   116
		}
franta-hg@72
   117
	}
franta-hg@72
   118
franta-hg@82
   119
	private String readPlainText(ResultSet rs, String xhtmlText) {
franta-hg@89
   120
		try {
franta-hg@89
   121
			TransformerFactory tf = TransformerFactory.newInstance();
franta-hg@89
   122
			Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
franta-hg@89
   123
franta-hg@89
   124
			StringReader input = new StringReader(xhtmlText);
franta-hg@89
   125
			StringWriter output = new StringWriter(xhtmlText.length());
franta-hg@89
   126
			textTransformer.transform(new StreamSource(input), new StreamResult(output));
franta-hg@89
   127
franta-hg@89
   128
			return output.toString();
franta-hg@89
   129
		} catch (Exception e) {
franta-hg@89
   130
			/**
franta-hg@89
   131
			 * TODO: lepší ošetření chyby
franta-hg@89
   132
			 */
franta-hg@89
   133
			log.log(Level.WARNING, "Error while transforming article to plain text", e);
franta-hg@89
   134
			return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě.");
franta-hg@89
   135
		}
franta-hg@72
   136
	}
franta-hg@72
   137
franta-hg@72
   138
	private String readXhtmlText(ResultSet rs) {
franta-hg@72
   139
		/**
franta-hg@82
   140
		 * TODO: 
franta-hg@82
   141
		 *		- znovupoužívat XSL transformér
franta-hg@82
   142
		 *		- používat cache, ukládat si vygenerované články
franta-hg@72
   143
		 */
franta-hg@74
   144
		try {
franta-hg@84
   145
			String inputText = makeSimpleXHTML(rs.getString("text"));
franta-hg@75
   146
franta-hg@82
   147
			TransformerFactory tf = TransformerFactory.newInstance();
franta-hg@82
   148
			Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
franta-hg@75
   149
franta-hg@82
   150
			String paragraphedText;
franta-hg@82
   151
			boolean tidyWasUsed = false;
franta-hg@82
   152
			try {
franta-hg@82
   153
				StringReader input = new StringReader(inputText);
franta-hg@82
   154
				StringWriter output = new StringWriter(2 * inputText.length());
franta-hg@82
   155
				paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
franta-hg@82
   156
				paragraphedText = output.toString();
franta-hg@82
   157
			} catch (Exception e) {
franta-hg@82
   158
				log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
franta-hg@82
   159
				StringReader input = new StringReader(tidyXhtml(inputText));
franta-hg@82
   160
				StringWriter output = new StringWriter(2 * inputText.length());
franta-hg@82
   161
				paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
franta-hg@82
   162
				paragraphedText = output.toString();
franta-hg@82
   163
				tidyWasUsed = true;
franta-hg@82
   164
			}
franta-hg@75
   165
franta-hg@82
   166
			Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
franta-hg@82
   167
			xhtmlTransformer.setParameter("isRoot", (rs.getInt("parent_id") == 0));
franta-hg@82
   168
			xhtmlTransformer.setParameter("title", rs.getString("subject"));
franta-hg@82
   169
			xhtmlTransformer.setParameter("urlBase", rs.getString("urlBase"));
franta-hg@82
   170
			xhtmlTransformer.setParameter("wwwRead", rs.getString("wwwRead"));
franta-hg@82
   171
			xhtmlTransformer.setParameter("wwwPost", rs.getString("wwwPost"));
franta-hg@82
   172
			xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
franta-hg@82
   173
			StringReader input = new StringReader(paragraphedText);
franta-hg@82
   174
			StringWriter output = new StringWriter(2 * paragraphedText.length());
franta-hg@82
   175
			xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output));
franta-hg@75
   176
franta-hg@74
   177
			return output.toString();
franta-hg@74
   178
		} catch (Exception e) {
franta-hg@74
   179
			/**
franta-hg@74
   180
			 * TODO: lepší ošetření chyby
franta-hg@74
   181
			 */
franta-hg@74
   182
			log.log(Level.WARNING, "Error while transforming article to XHTML", e);
franta-hg@84
   183
			return makeSimpleXHTML("<p>Při transformaci příspěvku bohužel došlo k chybě.</p>");
franta-hg@74
   184
		}
franta-hg@72
   185
	}
franta-hg@72
   186
franta-hg@84
   187
	private static String makeSimpleXHTML(String body) {
franta-hg@84
   188
		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
franta-hg@84
   189
	}
franta-hg@84
   190
franta-hg@75
   191
	/**
franta-hg@75
   192
	 * TODO: refaktorovat, přesunout
franta-hg@75
   193
	 */
franta-hg@75
   194
	private static String tidyXhtml(String inputText) throws IOException {
franta-hg@89
   195
		/*
franta-hg@89
   196
		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
franta-hg@89
   197
		 *
franta-hg@89
   198
		 * TODO:
franta-hg@89
   199
		 *		- použít delší zástupný řetězec, ne jen jeden znak
franta-hg@89
   200
		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
franta-hg@89
   201
		 */
franta-hg@100
   202
		inputText = označKonceŘádků(inputText);
franta-hg@82
   203
franta-hg@75
   204
		Runtime r = Runtime.getRuntime();
franta-hg@82
   205
		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
franta-hg@82
   206
					"-asxml", // well formed XHTML
franta-hg@82
   207
					"-numeric", // číselné entity
franta-hg@82
   208
					"-utf8", // kódování
franta-hg@82
   209
					"--show-warnings", "false", // žádná varování nás nezajímají
franta-hg@82
   210
					"--show-errors", "0", // ani chyby
franta-hg@82
   211
					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
franta-hg@82
   212
					"--logical-emphasis", "true", // em a strong místo i a b
franta-hg@82
   213
					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
franta-hg@82
   214
					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
franta-hg@82
   215
				});
franta-hg@75
   216
franta-hg@75
   217
		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
franta-hg@75
   218
		vstupProcesu.print(inputText);
franta-hg@75
   219
		vstupProcesu.close();
franta-hg@75
   220
franta-hg@75
   221
		String outputText = streamToString(p.getInputStream());
franta-hg@75
   222
franta-hg@100
   223
		outputText = vraťKonceŘádků(outputText);
franta-hg@82
   224
franta-hg@75
   225
		return outputText;
franta-hg@75
   226
	}
franta-hg@75
   227
franta-hg@100
   228
	private static String označKonceŘádků(String text) {
franta-hg@100
   229
		text = text.replaceAll(">\\s+<", "> <");
franta-hg@100
   230
		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
franta-hg@100
   231
		return text;
franta-hg@100
   232
	}
franta-hg@100
   233
franta-hg@100
   234
	private static String vraťKonceŘádků(String text) {
franta-hg@100
   235
		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
franta-hg@100
   236
		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
franta-hg@100
   237
		return text;
franta-hg@100
   238
	}
franta-hg@100
   239
franta-hg@75
   240
	/**
franta-hg@75
   241
	 * TODO: refaktorovat, přesunout
franta-hg@75
   242
	 */
franta-hg@75
   243
	private static String streamToString(InputStream proud) throws IOException {
franta-hg@75
   244
		StringBuilder výsledek = new StringBuilder();
franta-hg@75
   245
		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
franta-hg@75
   246
		while (true) {
franta-hg@75
   247
			String radek = buf.readLine();
franta-hg@75
   248
			if (radek == null) {
franta-hg@75
   249
				break;
franta-hg@75
   250
			} else {
franta-hg@75
   251
				výsledek.append(radek);
franta-hg@75
   252
				výsledek.append("\n");
franta-hg@75
   253
			}
franta-hg@75
   254
		}
franta-hg@75
   255
		return výsledek.toString();
franta-hg@75
   256
	}
franta-hg@75
   257
franta-hg@72
   258
	private static String constructMessageId(int articleID, int groupID, String groupName, String domainName) {
franta-hg@72
   259
		StringBuilder sb = new StringBuilder();
franta-hg@72
   260
		sb.append("<");
franta-hg@72
   261
		sb.append(articleID);
franta-hg@72
   262
		sb.append("-");
franta-hg@72
   263
		sb.append(groupID);
franta-hg@72
   264
		sb.append("-");
franta-hg@72
   265
		sb.append(groupName);
franta-hg@72
   266
		sb.append("@");
franta-hg@72
   267
		sb.append(domainName);
franta-hg@72
   268
		sb.append(">");
franta-hg@72
   269
		return sb.toString();
franta-hg@72
   270
	}
franta-hg@72
   271
franta-hg@72
   272
	@Override
franta-hg@72
   273
	public void setHeader(String name, String value) throws MessagingException {
franta-hg@72
   274
		super.setHeader(name, value);
franta-hg@72
   275
franta-hg@72
   276
		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
franta-hg@72
   277
			messageID = value;
franta-hg@72
   278
		}
franta-hg@72
   279
	}
franta-hg@72
   280
franta-hg@72
   281
	@Override
franta-hg@72
   282
	public final void addHeader(String name, String value) throws MessagingException {
franta-hg@72
   283
		super.addHeader(name, value);
franta-hg@72
   284
franta-hg@72
   285
		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
franta-hg@72
   286
			messageID = value;
franta-hg@72
   287
		}
franta-hg@72
   288
	}
franta-hg@72
   289
franta-hg@72
   290
	@Override
franta-hg@72
   291
	public void removeHeader(String name) throws MessagingException {
franta-hg@72
   292
		super.removeHeader(name);
franta-hg@72
   293
franta-hg@72
   294
		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
franta-hg@72
   295
			messageID = null;
franta-hg@72
   296
		}
franta-hg@72
   297
	}
franta-hg@72
   298
franta-hg@72
   299
	public void setMessageID(String messageID) {
franta-hg@72
   300
		this.messageID = messageID;
franta-hg@72
   301
	}
franta-hg@72
   302
franta-hg@72
   303
	@Override
franta-hg@72
   304
	protected void updateMessageID() throws MessagingException {
franta-hg@72
   305
		if (messageID == null) {
franta-hg@72
   306
			super.updateMessageID();
franta-hg@72
   307
		} else {
franta-hg@72
   308
			setHeader(MESSAGE_ID_HEADER, messageID);
franta-hg@72
   309
		}
franta-hg@72
   310
	}
franta-hg@72
   311
franta-hg@72
   312
	/**
franta-hg@72
   313
	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
franta-hg@72
   314
	 * 
franta-hg@72
   315
	 * <pre>MIME-Version: 1.0
franta-hg@72
   316
	 *Content-Type: multipart/alternative;</pre>
franta-hg@72
   317
	 * 
franta-hg@72
   318
	 * @return serialized headers
franta-hg@72
   319
	 * @throws MessagingException if getAllHeaders() fails
franta-hg@72
   320
	 */
franta-hg@72
   321
	public String getHeaders() throws MessagingException {
franta-hg@72
   322
		StringBuilder sb = new StringBuilder();
franta-hg@72
   323
		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
franta-hg@72
   324
			sb.append(eh.nextElement());
franta-hg@72
   325
			sb.append(CRLF);
franta-hg@72
   326
		}
franta-hg@72
   327
		return sb.toString();
franta-hg@72
   328
	}
franta-hg@72
   329
franta-hg@72
   330
	public byte[] getBody() throws IOException, MessagingException {
franta-hg@72
   331
		saveChanges();
franta-hg@72
   332
franta-hg@72
   333
		ArrayList<String> skipHeaders = new ArrayList<String>();
franta-hg@72
   334
		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
franta-hg@72
   335
			Header h = (Header) eh.nextElement();
franta-hg@72
   336
			skipHeaders.add(h.getName());
franta-hg@72
   337
		}
franta-hg@72
   338
franta-hg@72
   339
		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
franta-hg@72
   340
		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
franta-hg@72
   341
		return baos.toByteArray();
franta-hg@72
   342
	}
franta-hg@72
   343
}