Drupal: ignorování XML komentářů (nebudou dělit odstavce).
1.1 --- a/src/org/sonews/storage/DrupalArticle.java Thu Oct 20 10:50:58 2011 +0200
1.2 +++ b/src/org/sonews/storage/DrupalArticle.java Fri Oct 21 17:35:29 2011 +0200
1.3 @@ -24,7 +24,7 @@
1.4 import javax.mail.internet.InternetHeaders;
1.5
1.6 /**
1.7 - *
1.8 + * V Article je IMHO chyba, protože se hlavičky z msg zapíší dvakrát.
1.9 * @author František Kučera (frantovo.cz)
1.10 */
1.11 public class DrupalArticle extends Article {
2.1 --- a/src/org/sonews/storage/DrupalMessage.java Thu Oct 20 10:50:58 2011 +0200
2.2 +++ b/src/org/sonews/storage/DrupalMessage.java Fri Oct 21 17:35:29 2011 +0200
2.3 @@ -42,13 +42,19 @@
2.4 import javax.mail.internet.MimeBodyPart;
2.5 import javax.mail.internet.MimeMessage;
2.6 import javax.mail.internet.MimeMultipart;
2.7 +import javax.xml.parsers.DocumentBuilder;
2.8 +import javax.xml.parsers.DocumentBuilderFactory;
2.9 +import javax.xml.parsers.ParserConfigurationException;
2.10 import javax.xml.transform.Transformer;
2.11 import javax.xml.transform.TransformerException;
2.12 import javax.xml.transform.TransformerFactory;
2.13 +import javax.xml.transform.dom.DOMSource;
2.14 import javax.xml.transform.stream.StreamResult;
2.15 import javax.xml.transform.stream.StreamSource;
2.16 import org.sonews.daemon.NNTPConnection;
2.17 import org.sonews.util.io.Resource;
2.18 +import org.w3c.dom.Document;
2.19 +import org.xml.sax.SAXException;
2.20
2.21 /**
2.22 * This is MimeMessage which enables custom Message-ID header
2.23 @@ -71,14 +77,32 @@
2.24 private String messageID;
2.25 private Long parentID;
2.26 private Long groupID;
2.27 + private TransformerFactory transformerFactory;
2.28 + private DocumentBuilderFactory documentBuilderFactory;
2.29 +
2.30 + /**
2.31 + * Initializes XML factories (Transformer, DocumentBuilder).
2.32 + */
2.33 + private void initFactories() {
2.34 + transformerFactory = TransformerFactory.newInstance();
2.35 + documentBuilderFactory = DocumentBuilderFactory.newInstance();
2.36 + /**
2.37 + * Komentáře nás nepotřebujeme
2.38 + * (a museli bychom je brát v úvahu při dělení odstavců:
2.39 + * v současné verzi XSLT odstavcovače by nám případný komentář
2.40 + * rozdělil text na dva odstavce, přestože to má být odstavec jede).
2.41 + */
2.42 + documentBuilderFactory.setIgnoringComments(true);
2.43 + }
2.44
2.45 /**
2.46 * Constructs MIME message from SQL result.
2.47 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
2.48 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
2.49 */
2.50 - public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException {
2.51 + public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {
2.52 super(Session.getDefaultInstance(System.getProperties()));
2.53 + initFactories();
2.54
2.55 groupID = rs.getLong("group_id");
2.56 addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
2.57 @@ -135,9 +159,10 @@
2.58 */
2.59 public DrupalMessage(Article article) throws MessagingException {
2.60 super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
2.61 + initFactories();
2.62
2.63 String[] parentHeaders = getHeader("In-Reply-To");
2.64 - if (parentHeaders.length == 1) {
2.65 + if (parentHeaders != null && parentHeaders.length == 1) {
2.66 String parentMessageID = parentHeaders[0];
2.67 parentID = parseArticleID(parentMessageID);
2.68 groupID = parseGroupID(parentMessageID);
2.69 @@ -162,8 +187,7 @@
2.70
2.71 private String readPlainText(ResultSet rs, String xhtmlText) {
2.72 try {
2.73 - TransformerFactory tf = TransformerFactory.newInstance();
2.74 - Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
2.75 + Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
2.76
2.77 StringReader input = new StringReader(xhtmlText);
2.78 StringWriter output = new StringWriter(xhtmlText.length());
2.79 @@ -179,34 +203,37 @@
2.80 }
2.81 }
2.82
2.83 - private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException {
2.84 + private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {
2.85 + DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();
2.86 + Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));
2.87 + return new DOMSource(d);
2.88 + }
2.89 +
2.90 + private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {
2.91 /**
2.92 * TODO:
2.93 - * - znovupoužívat XSL transformér
2.94 + * - znovupoužívat XSL transformér (nejen v instanci)
2.95 * - používat cache, ukládat si vygenerované články
2.96 */
2.97 String inputText = makeSimpleXHTML(text);
2.98
2.99 - TransformerFactory tf = TransformerFactory.newInstance();
2.100 - Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
2.101 + Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
2.102
2.103 String paragraphedText;
2.104 boolean tidyWasUsed = false;
2.105 try {
2.106 - StringReader input = new StringReader(inputText);
2.107 StringWriter output = new StringWriter(2 * inputText.length());
2.108 - paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
2.109 + paragraphTransformer.transform(readDOM(inputText), new StreamResult(output));
2.110 paragraphedText = output.toString();
2.111 } catch (Exception e) {
2.112 log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
2.113 - StringReader input = new StringReader(tidyXhtml(inputText));
2.114 StringWriter output = new StringWriter(2 * inputText.length());
2.115 - paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
2.116 + paragraphTransformer.transform(readDOM(tidyXhtml(inputText)), new StreamResult(output));
2.117 paragraphedText = output.toString();
2.118 tidyWasUsed = true;
2.119 }
2.120
2.121 - Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
2.122 + Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
2.123 xhtmlTransformer.setParameter("isRoot", (parentId == 0));
2.124 xhtmlTransformer.setParameter("title", subject);
2.125 xhtmlTransformer.setParameter("urlBase", urlBase);