# HG changeset patch # User František Kučera # Date 1318797406 -7200 # Node ID 21f41354135721e7aa001fcaddb9fec14eae2908 # Parent b51ab80c7a9d276cf40bac071ef0c59981c9d4f1 Drupal: tidy, odstavce, XSLT diff -r b51ab80c7a9d -r 21f413541357 src/org/sonews/storage/DrupalMessage.java --- a/src/org/sonews/storage/DrupalMessage.java Sun Oct 16 20:55:46 2011 +0200 +++ b/src/org/sonews/storage/DrupalMessage.java Sun Oct 16 22:36:46 2011 +0200 @@ -91,57 +91,67 @@ Multipart multipart = new MimeMultipart("alternative"); setContent(multipart); + /** XHTML part */ + MimeBodyPart htmlPart = new MimeBodyPart(); + multipart.addBodyPart(htmlPart); + String xhtmlText = readXhtmlText(rs); + htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE); + /** Plain text part */ MimeBodyPart textPart = new MimeBodyPart(); multipart.addBodyPart(textPart); - textPart.setText(readPlainText(rs)); - - /** XHTML part */ - MimeBodyPart htmlPart = new MimeBodyPart(); - multipart.addBodyPart(htmlPart); - htmlPart.setContent(readXhtmlText(rs), XHTML_CONTENT_TYPE); + textPart.setText(readPlainText(rs, xhtmlText)); } else { + /** empty body, just headers */ setText(""); } } - private String readPlainText(ResultSet rs) { + private String readPlainText(ResultSet rs, String xhtmlText) { /** * TODO: převést na prostý text */ - return "TODO: obyčejný text"; + return "TODO: obyčejný text\n\n\n" + xhtmlText; } private String readXhtmlText(ResultSet rs) { /** - * TODO: znovupoužívat XSL transformér + * TODO: + * - znovupoužívat XSL transformér + * - používat cache, ukládat si vygenerované články */ try { - String originalText = rs.getString("text"); + String inputText = "" + rs.getString("text") + ""; - /** - * TODO: používat cache, ukládat si vygenerované články - * - * - * Místo markdownu jen ošetřit: - * - odstavce - * - nesmyslné entity v odkazech - * - neuzavřené značky: br, hr, img - */ - String tidyTexy = tidyXhtml("" + originalText + ""); + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl"))); + String paragraphedText; + boolean tidyWasUsed = false; + try { + StringReader input = new StringReader(inputText); + StringWriter output = new StringWriter(2 * inputText.length()); + paragraphTransformer.transform(new StreamSource(input), new StreamResult(output)); + paragraphedText = output.toString(); + } catch (Exception e) { + log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e); + StringReader input = new StringReader(tidyXhtml(inputText)); + StringWriter output = new StringWriter(2 * inputText.length()); + paragraphTransformer.transform(new StreamSource(input), new StreamResult(output)); + paragraphedText = output.toString(); + tidyWasUsed = true; + } - - StringReader input = new StringReader(tidyTexy); - StringWriter output = new StringWriter(2 * tidyTexy.length()); - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer t = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl"))); - t.setParameter("isRoot", (rs.getInt("parent_id") == 0)); - t.setParameter("title", rs.getString("subject")); - t.setParameter("urlBase", rs.getString("urlBase")); - t.setParameter("wwwRead", rs.getString("wwwRead")); - t.setParameter("wwwPost", rs.getString("wwwPost")); - t.transform(new StreamSource(input), new StreamResult(output)); + Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl"))); + xhtmlTransformer.setParameter("isRoot", (rs.getInt("parent_id") == 0)); + xhtmlTransformer.setParameter("title", rs.getString("subject")); + xhtmlTransformer.setParameter("urlBase", rs.getString("urlBase")); + xhtmlTransformer.setParameter("wwwRead", rs.getString("wwwRead")); + xhtmlTransformer.setParameter("wwwPost", rs.getString("wwwPost")); + xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed)); + StringReader input = new StringReader(paragraphedText); + StringWriter output = new StringWriter(2 * paragraphedText.length()); + xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output)); return output.toString(); } catch (Exception e) { @@ -157,15 +167,21 @@ * TODO: refaktorovat, přesunout */ private static String tidyXhtml(String inputText) throws IOException { + // https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966 + inputText = inputText.replaceAll("\\n", "◆\n"); + Runtime r = Runtime.getRuntime(); - Process p = r.exec(new String[]{"tidy", - "-asxml", - "-numeric", - "-utf8", - "-quiet", - "--doctype", "omit", - "--logical-emphasis", "true", - "--show-errors", "0"}); + Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net + "-asxml", // well formed XHTML + "-numeric", // číselné entity + "-utf8", // kódování + "--show-warnings", "false", // žádná varování nás nezajímají + "--show-errors", "0", // ani chyby + "--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT) + "--logical-emphasis", "true", // em a strong místo i a b + "--literal-attributes", "true", // zachovat mezery a konce řádků v atributech + "--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah + }); PrintStream vstupProcesu = new PrintStream(p.getOutputStream()); vstupProcesu.print(inputText); @@ -173,6 +189,9 @@ String outputText = streamToString(p.getInputStream()); + outputText = outputText.replaceAll("◆\\n", "\n"); + outputText = outputText.replaceAll("◆", "\n"); + return outputText; }