# HG changeset patch # User František Kučera # Date 1318468162 -7200 # Node ID 41d6c0cac8b34dcb79b5f6158127b1876d94fb0d # Parent e1244384cc6f14a71de0e6f2a57a62ea79490118 Drupal: číštění HTML – Tidy. diff -r e1244384cc6f -r 41d6c0cac8b3 src/org/sonews/storage/DrupalMessage.java --- a/src/org/sonews/storage/DrupalMessage.java Wed Oct 12 23:10:31 2011 +0200 +++ b/src/org/sonews/storage/DrupalMessage.java Thu Oct 13 03:09:22 2011 +0200 @@ -17,8 +17,12 @@ */ package org.sonews.storage; +import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PrintStream; import java.io.StringReader; import java.io.StringWriter; import java.io.UnsupportedEncodingException; @@ -114,8 +118,22 @@ */ try { String originalText = rs.getString("text"); - StringReader input = new StringReader("" + originalText + ""); - StringWriter output = new StringWriter(); + + /** + * TODO: používat cache, ukládat si vygenerované články + * + * + * Místo markdownu jen ošetřit: + * - odstavce + * - nesmyslné entity v odkazech + * - neuzavřené značky: br, hr, img + */ + String tidyTexy = tidyXhtml("" + originalText + ""); + + + + StringReader input = new StringReader(tidyTexy); + StringWriter output = new StringWriter(2 * tidyTexy.length()); TransformerFactory tf = TransformerFactory.newInstance(); Transformer t = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl"))); t.setParameter("isRoot", (rs.getInt("parent_id") == 0)); @@ -124,7 +142,7 @@ t.setParameter("wwwRead", rs.getString("wwwRead")); t.setParameter("wwwPost", rs.getString("wwwPost")); t.transform(new StreamSource(input), new StreamResult(output)); - + return output.toString(); } catch (Exception e) { /** @@ -135,6 +153,47 @@ } } + /** + * TODO: refaktorovat, přesunout + */ + private static String tidyXhtml(String inputText) throws IOException { + Runtime r = Runtime.getRuntime(); + Process p = r.exec(new String[]{"tidy", + "-asxml", + "-numeric", + "-utf8", + "-quiet", + "--doctype", "omit", + "--logical-emphasis", "true", + "--show-errors", "0"}); + + PrintStream vstupProcesu = new PrintStream(p.getOutputStream()); + vstupProcesu.print(inputText); + vstupProcesu.close(); + + String outputText = streamToString(p.getInputStream()); + + return outputText; + } + + /** + * TODO: refaktorovat, přesunout + */ + private static String streamToString(InputStream proud) throws IOException { + StringBuilder výsledek = new StringBuilder(); + BufferedReader buf = new BufferedReader(new InputStreamReader(proud)); + while (true) { + String radek = buf.readLine(); + if (radek == null) { + break; + } else { + výsledek.append(radek); + výsledek.append("\n"); + } + } + return výsledek.toString(); + } + private static String constructMessageId(int articleID, int groupID, String groupName, String domainName) { StringBuilder sb = new StringBuilder(); sb.append("<");