3 * see AUTHORS for the list of contributors
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 package org.sonews.storage;
20 import java.io.BufferedReader;
21 import java.io.ByteArrayInputStream;
22 import java.io.ByteArrayOutputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.PrintStream;
27 import java.io.StringReader;
28 import java.io.StringWriter;
29 import java.io.UnsupportedEncodingException;
30 import java.sql.ResultSet;
31 import java.sql.SQLException;
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.Date;
35 import java.util.Enumeration;
36 import java.util.logging.Level;
37 import java.util.logging.Logger;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40 import javax.mail.Header;
41 import javax.mail.MessagingException;
42 import javax.mail.Multipart;
43 import javax.mail.Session;
44 import javax.mail.internet.InternetAddress;
45 import javax.mail.internet.MimeBodyPart;
46 import javax.mail.internet.MimeMessage;
47 import javax.mail.internet.MimeMultipart;
48 import javax.xml.parsers.DocumentBuilder;
49 import javax.xml.parsers.DocumentBuilderFactory;
50 import javax.xml.parsers.ParserConfigurationException;
51 import javax.xml.transform.Transformer;
52 import javax.xml.transform.TransformerException;
53 import javax.xml.transform.TransformerFactory;
54 import javax.xml.transform.dom.DOMSource;
55 import javax.xml.transform.stream.StreamResult;
56 import javax.xml.transform.stream.StreamSource;
57 import org.sonews.daemon.NNTPConnection;
58 import org.sonews.util.io.Resource;
59 import org.w3c.dom.Document;
60 import org.xml.sax.SAXException;
63 * This is MimeMessage which enables custom Message-ID header
64 * (this header will not be overwritten by the default one like in MimeMessage).
66 * Also add header and body separate serialization.
68 * And can be deserialized from SQL ResultSet or an Article
70 * @author František Kučera (frantovo.cz)
72 public class DrupalMessage extends MimeMessage {
75 * If body of message posted by user through NNTP starts with this text,
76 * it will be treated as formated text in Markdown syntax.
78 private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
79 private static final String MESSAGE_ID_HEADER = "Message-ID";
80 private static final String CRLF = "\r\n";
81 public static final String CHARSET = "UTF-8";
82 private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
83 private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
84 private static final String MARKDOWN_HEADER = "#!markdown\r\n";
85 private static final String SIGNATURE_BLOCK = "\r\n-- \r\n";
86 private String messageID;
87 private Long parentID;
89 private TransformerFactory transformerFactory;
90 private DocumentBuilderFactory documentBuilderFactory;
93 * Initializes XML factories (Transformer, DocumentBuilder).
95 private void initFactories() {
96 transformerFactory = TransformerFactory.newInstance();
97 documentBuilderFactory = DocumentBuilderFactory.newInstance();
99 * Komentáře nás nepotřebujeme
100 * (a museli bychom je brát v úvahu při dělení odstavců:
101 * v současné verzi XSLT odstavcovače by nám případný komentář
102 * rozdělil text na dva odstavce, přestože to má být odstavec jede).
104 documentBuilderFactory.setIgnoringComments(true);
108 * Constructs MIME message from SQL result.
109 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
110 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
112 public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {
113 super(Session.getDefaultInstance(System.getProperties()));
116 groupID = rs.getLong("group_id");
117 addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
118 addHeader("Newsgroups", rs.getString("group_name"));
119 setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
120 setSubject(rs.getString("subject"));
121 setSentDate(new Date(rs.getLong("created")));
123 parentID = rs.getLong("parent_id");
125 String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
126 addHeader("In-Reply-To", parentMessageID);
127 addHeader("References", parentMessageID);
131 Multipart multipart = new MimeMultipart("alternative");
132 setContent(multipart);
135 MimeBodyPart htmlPart = new MimeBodyPart();
136 String xhtmlText = readXhtmlText(
137 rs.getString("text"),
138 rs.getString("subject"),
139 rs.getInt("parent_id"),
140 rs.getString("urlBase"),
141 rs.getString("wwwRead"),
142 rs.getString("wwwPost"));
143 htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
145 /** Plain text part */
146 MimeBodyPart textPart = new MimeBodyPart();
147 String plainText = formatedToPlainText(xhtmlText);
148 textPart.setText(plainText);
149 //addHeader("Lines", String.valueOf(plainText.split("\n").length));
152 * Thunderbirdu záleží, v jakém pořadí části jsou
153 * (když je prostý text druhý, html se nezobrazí),
154 * KNode zobrazuje HTML správně, i když je na prvním místě.
156 multipart.addBodyPart(textPart);
157 multipart.addBodyPart(htmlPart);
159 /** empty body, just headers */
165 * Constructs MIME message from article posted by user.
166 * @param article article that came through NNTP.
167 * @throws MessagingException
169 public DrupalMessage(Article article) throws MessagingException {
170 super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
173 String[] replyToHeaders = getHeader("In-Reply-To");
174 String[] referencesHeaders = getHeader("References");
175 String parentMessageID;
176 if (replyToHeaders != null && replyToHeaders.length == 1) {
177 parentMessageID = replyToHeaders[0];
178 } else if (referencesHeaders != null && referencesHeaders.length == 1) {
179 Pattern p = Pattern.compile("(\\s*<.*>)*\\s*(<.*>)");
180 Matcher m = p.matcher(referencesHeaders[0]);
183 parentMessageID = m.group(2);
185 throw new MessagingException("Message posted by user had invalid References header: " + referencesHeaders[0]);
188 throw new MessagingException("Message posted by user must have exactly one In-Reply-To header. Reply-To headers: " + Arrays.toString(replyToHeaders) + " Referemces headers: " + Arrays.toString(referencesHeaders));
191 parentID = parseArticleID(parentMessageID);
192 groupID = parseGroupID(parentMessageID);
195 private static InputStream serializeArticle(Article a) {
196 byte articleHeaders[] = a.getHeaderSource().getBytes();
197 byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();
198 byte body[] = a.getBody();
200 byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];
202 System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);
203 System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);
204 System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);
206 return new ByteArrayInputStream(message);
210 * @param xhtmlText well-formed XHTML
211 * @return plain text representation of this formated text
213 private String formatedToPlainText(String xhtmlText) {
215 Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
217 StringReader input = new StringReader(xhtmlText);
218 StringWriter output = new StringWriter(xhtmlText.length());
219 textTransformer.transform(new StreamSource(input), new StreamResult(output));
221 return output.toString();
222 } catch (Exception e) {
224 * TODO: lepší ošetření chyby
226 log.log(Level.WARNING, "Error while transforming article to plain text", e);
227 return "Při transformaci příspěvku bohužel došlo k chybě.";
231 private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {
232 DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();
233 Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));
234 return new DOMSource(d);
237 private String readXhtmlText(String sourceText, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {
240 * - znovupoužívat XSL transformér (nejen v instanci)
241 * - používat cache, ukládat si vygenerované články
243 String wrappedText = makeSimpleXHTML(sourceText);
245 Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
246 String paragraphedText;
247 boolean tidyWasUsed = false;
249 StringWriter output = new StringWriter(2 * wrappedText.length());
250 paragraphTransformer.transform(readDOM(wrappedText), new StreamResult(output));
251 paragraphedText = output.toString();
252 } catch (Exception e) {
253 log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
254 StringWriter output = new StringWriter(2 * wrappedText.length());
255 paragraphTransformer.transform(readDOM(tidyXhtml(wrappedText)), new StreamResult(output));
256 paragraphedText = output.toString();
260 Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
261 xhtmlTransformer.setParameter("isRoot", (parentId == 0));
262 xhtmlTransformer.setParameter("title", subject);
263 xhtmlTransformer.setParameter("urlBase", urlBase);
264 xhtmlTransformer.setParameter("wwwRead", wwwRead);
265 xhtmlTransformer.setParameter("wwwPost", wwwPost);
266 xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
267 StringReader paragraphedReader = new StringReader(paragraphedText);
268 StringWriter xhtmlWriter = new StringWriter(2 * paragraphedText.length());
269 xhtmlTransformer.transform(new StreamSource(paragraphedReader), new StreamResult(xhtmlWriter));
271 return xhtmlWriter.toString();
275 * Converts markdown to XHTML.
276 * @param markdown text in Markdown syntax
277 * @return XHTML document (with html/body elements)
278 * @throws StorageBackendException when markdown proces returned any errors
279 * (other exceptions are thrown when afterwards XHTML validation fails).
281 private String readXhtmlTextMarkdown(String markdown) throws TransformerException, IOException, ParserConfigurationException, SAXException, StorageBackendException {
282 Runtime r = Runtime.getRuntime();
283 Process p = r.exec(new String[]{"sudo", "-u", "markdown", "/usr/bin/markdown"});
285 PrintStream processInput = new PrintStream(p.getOutputStream());
286 processInput.print(markdown);
287 processInput.close();
289 String errors = streamToString(p.getErrorStream());
290 String htmlFragment = streamToString(p.getInputStream());
292 if (errors.length() == 0) {
293 String htmlDocument = makeSimpleXHTML(htmlFragment);
294 String xhtmlDocument = readXhtmlText(htmlDocument, null, -1, null, null, null);
295 return xhtmlDocument;
297 throw new StorageBackendException("Error while transforming Markdown to XHTML: " + errors);
302 * Does not parse XML works just with text.
303 * @param body XHTML fragment that should be put between <body> and </body>
304 * @return simple XHTML document (body wrapped in html and body tags)
306 private static String makeSimpleXHTML(String body) {
307 return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
311 * Does not parse XML works just with text.
312 * @param xhtml whole XHTML page
313 * @return content between <body> and </body> tags.
315 private static String makeFragmentXHTML(String xhtml) {
316 final String startTag = "<body>";
317 final String endTag = "</body>";
319 int start = xhtml.indexOf(startTag) + startTag.length();
320 int end = xhtml.lastIndexOf(endTag);
322 return xhtml.substring(start, end);
326 * TODO: refaktorovat, přesunout
328 private static String tidyXhtml(String inputText) throws IOException {
330 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
333 * - použít delší zástupný řetězec, ne jen jeden znak
334 * - umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
336 inputText = označKonceŘádků(inputText);
338 Runtime r = Runtime.getRuntime();
339 // TODO: spouštět přes sudo jako Markdown
340 Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
341 "-asxml", // well formed XHTML
342 "-numeric", // číselné entity
344 "--show-warnings", "false", // žádná varování nás nezajímají
345 "--show-errors", "0", // ani chyby
346 "--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
347 "--logical-emphasis", "true", // em a strong místo i a b
348 "--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
349 "--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
352 PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
353 vstupProcesu.print(inputText);
354 vstupProcesu.close();
356 String outputText = streamToString(p.getInputStream());
358 outputText = vraťKonceŘádků(outputText);
363 private static String označKonceŘádků(String text) {
364 text = text.replaceAll(">\\s+<", "> <");
365 text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
369 private static String vraťKonceŘádků(String text) {
370 text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
371 text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
376 * TODO: refaktorovat, přesunout
378 private static String streamToString(InputStream proud) throws IOException {
379 StringBuilder výsledek = new StringBuilder();
380 BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
382 String radek = buf.readLine();
386 výsledek.append(radek);
387 výsledek.append("\n");
390 return výsledek.toString();
393 public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {
394 StringBuilder sb = new StringBuilder();
396 sb.append(articleID);
400 sb.append(groupName);
402 sb.append(domainName);
404 return sb.toString();
408 * @return article ID of parent of this message | or null, if this is root article and not reply to another one
410 public Long getParentID() {
415 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group
417 public Long getGroupID() {
423 * @param messageID <{0}-{1}-{2}@domain.tld> where {0} is nntp_id and {1} is group_id and {2} is group_name
424 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid
426 private static String[] parseMessageID(String messageID) {
427 if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {
428 return messageID.substring(1).split("@")[0].split("\\-");
434 public static Long parseArticleID(String messageID) {
435 String[] localPart = parseMessageID(messageID);
436 if (localPart == null) {
439 return Long.parseLong(localPart[0]);
443 public static Long parseGroupID(String messageID) {
444 String[] localPart = parseMessageID(messageID);
445 if (localPart == null) {
448 return Long.parseLong(localPart[1]);
450 // parseGroupName() will be same as this method, just with:
451 // return localPart[2];
456 public void setHeader(String name, String value) throws MessagingException {
457 super.setHeader(name, value);
459 if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
465 public final void addHeader(String name, String value) throws MessagingException {
466 super.addHeader(name, value);
468 if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
474 public void removeHeader(String name) throws MessagingException {
475 super.removeHeader(name);
477 if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
482 public void setMessageID(String messageID) {
483 this.messageID = messageID;
487 protected void updateMessageID() throws MessagingException {
488 if (messageID == null) {
489 super.updateMessageID();
491 setHeader(MESSAGE_ID_HEADER, messageID);
496 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
498 * <pre>MIME-Version: 1.0
499 *Content-Type: multipart/alternative;</pre>
501 * @return serialized headers
502 * @throws MessagingException if getAllHeaders() fails
504 public String getHeaders() throws MessagingException {
505 StringBuilder sb = new StringBuilder();
506 for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
507 sb.append(eh.nextElement());
510 return sb.toString();
513 public byte[] getBody() throws IOException, MessagingException {
516 ArrayList<String> skipHeaders = new ArrayList<String>();
517 for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
518 Header h = (Header) eh.nextElement();
519 skipHeaders.add(h.getName());
522 ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
523 writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
524 return baos.toByteArray();
528 * Transforms message content to valid XHTML and strips html and body tags.
529 * When receiving message from user through NNTP
530 * this method is used to get text that should be saved into databse.
531 * @return XHTML fragment – content between <body> and </body> tags.
533 public String getBodyXhtmlFragment() throws StorageBackendException {
536 * - tohle dělat už v konstruktoru a pak už mít všechno stejné, ať už jde o zprávu přijatou od NNTP uživatele nebo načtenou z DB
537 * - podporovat i zprávy přímo v HTML a multipart.
538 * - ořezávat podpis (SIGNATURE_BLOCK)
541 Object c = getContent();
542 if (isMimeType("text/plain") && c instanceof String) {
543 String inputText = (String) c;
546 if (inputText.startsWith(MARKDOWN_HEADER)) {
547 xhtml = readXhtmlTextMarkdown(inputText.substring(MARKDOWN_HEADER.length()));
550 xhtml = readXhtmlText(
558 return makeFragmentXHTML(xhtml);
560 throw new StorageBackendException("Only text/plain messages are supported for now – post it as plain text please.");
562 } catch (Exception e) {
563 throw new StorageBackendException(e);
567 public String getBodyPlainText() throws StorageBackendException {
569 * TODO: netransformovat XHTML 2x
571 return formatedToPlainText(makeSimpleXHTML(getBodyXhtmlFragment()));