# HG changeset patch # User František Kučera # Date 1319211329 -7200 # Node ID b4c8a2760d6f010796ae8bdc510d378dbeb65602 # Parent a788bf0e10801af0a670d359e5e15337cc3503d5 Drupal: ignorování XML komentářů (nebudou dělit odstavce). diff -r a788bf0e1080 -r b4c8a2760d6f src/org/sonews/storage/DrupalArticle.java --- a/src/org/sonews/storage/DrupalArticle.java Thu Oct 20 10:50:58 2011 +0200 +++ b/src/org/sonews/storage/DrupalArticle.java Fri Oct 21 17:35:29 2011 +0200 @@ -24,7 +24,7 @@ import javax.mail.internet.InternetHeaders; /** - * + * V Article je IMHO chyba, protože se hlavičky z msg zapíší dvakrát. * @author František Kučera (frantovo.cz) */ public class DrupalArticle extends Article { diff -r a788bf0e1080 -r b4c8a2760d6f src/org/sonews/storage/DrupalMessage.java --- a/src/org/sonews/storage/DrupalMessage.java Thu Oct 20 10:50:58 2011 +0200 +++ b/src/org/sonews/storage/DrupalMessage.java Fri Oct 21 17:35:29 2011 +0200 @@ -42,13 +42,19 @@ import javax.mail.internet.MimeBodyPart; import javax.mail.internet.MimeMessage; import javax.mail.internet.MimeMultipart; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.sonews.daemon.NNTPConnection; import org.sonews.util.io.Resource; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; /** * This is MimeMessage which enables custom Message-ID header @@ -71,14 +77,32 @@ private String messageID; private Long parentID; private Long groupID; + private TransformerFactory transformerFactory; + private DocumentBuilderFactory documentBuilderFactory; + + /** + * Initializes XML factories (Transformer, DocumentBuilder). + */ + private void initFactories() { + transformerFactory = TransformerFactory.newInstance(); + documentBuilderFactory = DocumentBuilderFactory.newInstance(); + /** + * Komentáře nás nepotřebujeme + * (a museli bychom je brát v úvahu při dělení odstavců: + * v současné verzi XSLT odstavcovače by nám případný komentář + * rozdělil text na dva odstavce, přestože to má být odstavec jede). + */ + documentBuilderFactory.setIgnoringComments(true); + } /** * Constructs MIME message from SQL result. * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read. * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy). */ - public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException { + public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException { super(Session.getDefaultInstance(System.getProperties())); + initFactories(); groupID = rs.getLong("group_id"); addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain)); @@ -135,9 +159,10 @@ */ public DrupalMessage(Article article) throws MessagingException { super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article)); + initFactories(); String[] parentHeaders = getHeader("In-Reply-To"); - if (parentHeaders.length == 1) { + if (parentHeaders != null && parentHeaders.length == 1) { String parentMessageID = parentHeaders[0]; parentID = parseArticleID(parentMessageID); groupID = parseGroupID(parentMessageID); @@ -162,8 +187,7 @@ private String readPlainText(ResultSet rs, String xhtmlText) { try { - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl"))); + Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl"))); StringReader input = new StringReader(xhtmlText); StringWriter output = new StringWriter(xhtmlText.length()); @@ -179,34 +203,37 @@ } } - private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException { + private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException { + DocumentBuilder db = documentBuilderFactory.newDocumentBuilder(); + Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8"))); + return new DOMSource(d); + } + + private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException { /** * TODO: - * - znovupoužívat XSL transformér + * - znovupoužívat XSL transformér (nejen v instanci) * - používat cache, ukládat si vygenerované články */ String inputText = makeSimpleXHTML(text); - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl"))); + Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl"))); String paragraphedText; boolean tidyWasUsed = false; try { - StringReader input = new StringReader(inputText); StringWriter output = new StringWriter(2 * inputText.length()); - paragraphTransformer.transform(new StreamSource(input), new StreamResult(output)); + paragraphTransformer.transform(readDOM(inputText), new StreamResult(output)); paragraphedText = output.toString(); } catch (Exception e) { log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e); - StringReader input = new StringReader(tidyXhtml(inputText)); StringWriter output = new StringWriter(2 * inputText.length()); - paragraphTransformer.transform(new StreamSource(input), new StreamResult(output)); + paragraphTransformer.transform(readDOM(tidyXhtml(inputText)), new StreamResult(output)); paragraphedText = output.toString(); tidyWasUsed = true; } - Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl"))); + Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl"))); xhtmlTransformer.setParameter("isRoot", (parentId == 0)); xhtmlTransformer.setParameter("title", subject); xhtmlTransformer.setParameter("urlBase", urlBase);