1.1 --- a/src/org/sonews/storage/DrupalMessage.java Thu Oct 13 03:09:22 2011 +0200
1.2 +++ b/src/org/sonews/storage/DrupalMessage.java Sun Oct 16 23:41:04 2011 +0200
1.3 @@ -91,57 +91,67 @@
1.4 Multipart multipart = new MimeMultipart("alternative");
1.5 setContent(multipart);
1.6
1.7 + /** XHTML part */
1.8 + MimeBodyPart htmlPart = new MimeBodyPart();
1.9 + multipart.addBodyPart(htmlPart);
1.10 + String xhtmlText = readXhtmlText(rs);
1.11 + htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
1.12 +
1.13 /** Plain text part */
1.14 MimeBodyPart textPart = new MimeBodyPart();
1.15 multipart.addBodyPart(textPart);
1.16 - textPart.setText(readPlainText(rs));
1.17 -
1.18 - /** XHTML part */
1.19 - MimeBodyPart htmlPart = new MimeBodyPart();
1.20 - multipart.addBodyPart(htmlPart);
1.21 - htmlPart.setContent(readXhtmlText(rs), XHTML_CONTENT_TYPE);
1.22 + textPart.setText(readPlainText(rs, xhtmlText));
1.23 } else {
1.24 + /** empty body, just headers */
1.25 setText("");
1.26 }
1.27 }
1.28
1.29 - private String readPlainText(ResultSet rs) {
1.30 + private String readPlainText(ResultSet rs, String xhtmlText) {
1.31 /**
1.32 * TODO: převést na prostý text
1.33 */
1.34 - return "TODO: obyčejný text";
1.35 + return "TODO: obyčejný text\n\n\n" + xhtmlText;
1.36 }
1.37
1.38 private String readXhtmlText(ResultSet rs) {
1.39 /**
1.40 - * TODO: znovupoužívat XSL transformér
1.41 + * TODO:
1.42 + * - znovupoužívat XSL transformér
1.43 + * - používat cache, ukládat si vygenerované články
1.44 */
1.45 try {
1.46 - String originalText = rs.getString("text");
1.47 + String inputText = "<html><body>" + rs.getString("text") + "</body></html>";
1.48
1.49 - /**
1.50 - * TODO: používat cache, ukládat si vygenerované články
1.51 - *
1.52 - *
1.53 - * Místo markdownu jen ošetřit:
1.54 - * - odstavce
1.55 - * - nesmyslné entity v odkazech
1.56 - * - neuzavřené značky: br, hr, img
1.57 - */
1.58 - String tidyTexy = tidyXhtml("<html><body>" + originalText + "</body></html>");
1.59 + TransformerFactory tf = TransformerFactory.newInstance();
1.60 + Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
1.61
1.62 + String paragraphedText;
1.63 + boolean tidyWasUsed = false;
1.64 + try {
1.65 + StringReader input = new StringReader(inputText);
1.66 + StringWriter output = new StringWriter(2 * inputText.length());
1.67 + paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
1.68 + paragraphedText = output.toString();
1.69 + } catch (Exception e) {
1.70 + log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
1.71 + StringReader input = new StringReader(tidyXhtml(inputText));
1.72 + StringWriter output = new StringWriter(2 * inputText.length());
1.73 + paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
1.74 + paragraphedText = output.toString();
1.75 + tidyWasUsed = true;
1.76 + }
1.77
1.78 -
1.79 - StringReader input = new StringReader(tidyTexy);
1.80 - StringWriter output = new StringWriter(2 * tidyTexy.length());
1.81 - TransformerFactory tf = TransformerFactory.newInstance();
1.82 - Transformer t = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
1.83 - t.setParameter("isRoot", (rs.getInt("parent_id") == 0));
1.84 - t.setParameter("title", rs.getString("subject"));
1.85 - t.setParameter("urlBase", rs.getString("urlBase"));
1.86 - t.setParameter("wwwRead", rs.getString("wwwRead"));
1.87 - t.setParameter("wwwPost", rs.getString("wwwPost"));
1.88 - t.transform(new StreamSource(input), new StreamResult(output));
1.89 + Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
1.90 + xhtmlTransformer.setParameter("isRoot", (rs.getInt("parent_id") == 0));
1.91 + xhtmlTransformer.setParameter("title", rs.getString("subject"));
1.92 + xhtmlTransformer.setParameter("urlBase", rs.getString("urlBase"));
1.93 + xhtmlTransformer.setParameter("wwwRead", rs.getString("wwwRead"));
1.94 + xhtmlTransformer.setParameter("wwwPost", rs.getString("wwwPost"));
1.95 + xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
1.96 + StringReader input = new StringReader(paragraphedText);
1.97 + StringWriter output = new StringWriter(2 * paragraphedText.length());
1.98 + xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output));
1.99
1.100 return output.toString();
1.101 } catch (Exception e) {
1.102 @@ -157,15 +167,21 @@
1.103 * TODO: refaktorovat, přesunout
1.104 */
1.105 private static String tidyXhtml(String inputText) throws IOException {
1.106 + // https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
1.107 + inputText = inputText.replaceAll("\\n", "◆\n");
1.108 +
1.109 Runtime r = Runtime.getRuntime();
1.110 - Process p = r.exec(new String[]{"tidy",
1.111 - "-asxml",
1.112 - "-numeric",
1.113 - "-utf8",
1.114 - "-quiet",
1.115 - "--doctype", "omit",
1.116 - "--logical-emphasis", "true",
1.117 - "--show-errors", "0"});
1.118 + Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
1.119 + "-asxml", // well formed XHTML
1.120 + "-numeric", // číselné entity
1.121 + "-utf8", // kódování
1.122 + "--show-warnings", "false", // žádná varování nás nezajímají
1.123 + "--show-errors", "0", // ani chyby
1.124 + "--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
1.125 + "--logical-emphasis", "true", // em a strong místo i a b
1.126 + "--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
1.127 + "--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
1.128 + });
1.129
1.130 PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
1.131 vstupProcesu.print(inputText);
1.132 @@ -173,6 +189,9 @@
1.133
1.134 String outputText = streamToString(p.getInputStream());
1.135
1.136 + outputText = outputText.replaceAll("◆\\n", "\n");
1.137 + outputText = outputText.replaceAll("◆", "\n");
1.138 +
1.139 return outputText;
1.140 }
1.141