# HG changeset patch # User František Kučera <franta-hg@frantovo.cz> # Date 1318868074 -7200 # Node ID c60625d581582e672c66e654d07195da256b94b9 # Parent cbf1a1153313a4297187ae2ff58bba6af02449d8 Drupal: textová část zpráv (text/plain), základní funkční verze (XSLT+Java). diff -r cbf1a1153313 -r c60625d58158 helpers/mimeTextPart.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/helpers/mimeTextPart.xsl Mon Oct 17 18:14:34 2011 +0200 @@ -0,0 +1,199 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet version="2.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:fn="http://www.w3.org/2005/xpath-functions" + xmlns:h="http://www.w3.org/1999/xhtml"> + <xsl:output method="text" encoding="UTF-8"/> + <xsl:strip-space elements="*"/> + + <xsl:output method="text" encoding="UTF-8"/> + <xsl:strip-space elements="*"/> + + <xsl:variable name="urlBase" select="/h:html/h:head/h:base/@href"/> + + <!-- Celý dokument --> + <xsl:template match="/"> + <xsl:apply-templates select="h:html/h:body"/> + </xsl:template> + + <xsl:template match="h:h1"> + <xsl:value-of select="text()"/> + <xsl:text> </xsl:text> + <xsl:for-each select="1 to string-length(.)">#</xsl:for-each> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:h2"> + <xsl:value-of select="text()"/> + <xsl:text> </xsl:text> + <xsl:for-each select="1 to string-length(.)">-</xsl:for-each> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:h3"> + <xsl:for-each select="1 to 3">#</xsl:for-each> + <xsl:text> </xsl:text> + <xsl:value-of select="text()"/> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + + <xsl:template match="h:h4"> + <xsl:for-each select="1 to 4">#</xsl:for-each> + <xsl:text> </xsl:text> + <xsl:value-of select="text()"/> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + + <xsl:template match="h:h5"> + <xsl:for-each select="1 to 5">#</xsl:for-each> + <xsl:text> </xsl:text> + <xsl:value-of select="text()"/> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:h6"> + <xsl:for-each select="1 to 6">#</xsl:for-each> + <xsl:text> </xsl:text> + <xsl:value-of select="text()"/> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + + <xsl:template match="h:p"> + <xsl:apply-templates/> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:a"> + <xsl:text>"</xsl:text> + <xsl:value-of select="text()"/> + <xsl:text>" <</xsl:text> + <xsl:choose> + <xsl:when test="matches(@href, '^(http:|https:|ftp:)')"> + <xsl:value-of select="@href"/> + </xsl:when> + <xsl:when test="matches(@href, '^mailto:')"> + <xsl:value-of select="substring-after(@href, 'mailto:')"/> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + <xsl:when test="ends-with($urlBase, '/') or starts-with(@href, '/')"> + <xsl:value-of select="concat($urlBase, @href)"/> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="concat($urlBase, '/', @href)"/> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + <xsl:text>></xsl:text> + <xsl:if test="@title and not(matches(@title, '^\s*$'))"> + <xsl:text> (</xsl:text> + <xsl:value-of select="@title"/> + <xsl:text>)</xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="h:img"> + <xsl:variable name="obrázek"> + <h:a href="{@src}" title="{@title}">Obrázek: <xsl:value-of select="@alt"/></h:a> + </xsl:variable> + <xsl:apply-templates select="$obrázek/node()"/> + </xsl:template> + + <xsl:template match="h:strong|h:b"> + <xsl:text>**</xsl:text> + <xsl:apply-templates/> + <xsl:text>**</xsl:text> + </xsl:template> + + <xsl:template match="h:em|h:i"> + <xsl:text>*</xsl:text> + <xsl:apply-templates/> + <xsl:text>*</xsl:text> + </xsl:template> + + <xsl:template match="h:abbr[@title]"> + <xsl:apply-templates/> + <xsl:text> (</xsl:text> + <xsl:value-of select="@title"/> + <xsl:text>)</xsl:text> + </xsl:template> + + <xsl:template match="h:pre"> + <xsl:text>--------------------------------</xsl:text> + <xsl:text> </xsl:text> + <xsl:apply-templates/> + <xsl:text> </xsl:text> + <xsl:text>--------------------------------</xsl:text> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:code"> + <xsl:text>`</xsl:text> + <xsl:apply-templates/> + <xsl:text>`</xsl:text> + </xsl:template> + + <xsl:template match="h:hr"> + <xsl:text>----------------------------------------------------------------</xsl:text> + <xsl:text> </xsl:text> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:ul"> + <xsl:apply-templates/> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:ul/h:li"> + <xsl:text> - </xsl:text> + <xsl:apply-templates/> + <xsl:text> </xsl:text> + </xsl:template> + + <xsl:template match="h:ol"> + <xsl:for-each select="h:li"> + <xsl:value-of select="concat(' ', position(), ') ')"/> + <xsl:apply-templates/> + <xsl:text> </xsl:text> + </xsl:for-each> + <xsl:text> </xsl:text> + </xsl:template> + + + <!-- + <xsl:template match="h:blockquote[matches(p/text(), '^(\"|„)')]"> + + </xsl:template> + --> + + <xsl:template match="text()[not(parent::h:pre)]"> + <xsl:if test="matches(., '^\s')"> + <xsl:text> </xsl:text> + </xsl:if> + <xsl:value-of select="normalize-space(.)"/> + <xsl:if test="matches(., '\s$')"> + <xsl:text> </xsl:text> + </xsl:if> + </xsl:template> + + + <xsl:template match="h:div[@class='wwwLinks']"> + <xsl:text>-- </xsl:text> + <xsl:text> </xsl:text> + <xsl:apply-templates/> + </xsl:template> + + +</xsl:stylesheet> diff -r cbf1a1153313 -r c60625d58158 src/org/sonews/storage/DrupalMessage.java --- a/src/org/sonews/storage/DrupalMessage.java Mon Oct 17 13:55:28 2011 +0200 +++ b/src/org/sonews/storage/DrupalMessage.java Mon Oct 17 18:14:34 2011 +0200 @@ -98,7 +98,9 @@ /** Plain text part */ MimeBodyPart textPart = new MimeBodyPart(); - textPart.setText(readPlainText(rs, xhtmlText)); + String plainText = readPlainText(rs, xhtmlText); + textPart.setText(plainText); + //addHeader("Lines", String.valueOf(plainText.split("\n").length)); /** * Thunderbirdu záleží, v jakém pořadí části jsou @@ -114,10 +116,22 @@ } private String readPlainText(ResultSet rs, String xhtmlText) { - /** - * TODO: převést na prostý text - */ - return "TODO: obyčejný text\n(zatím čtěte XHTML verzi)"; + try { + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl"))); + + StringReader input = new StringReader(xhtmlText); + StringWriter output = new StringWriter(xhtmlText.length()); + textTransformer.transform(new StreamSource(input), new StreamResult(output)); + + return output.toString(); + } catch (Exception e) { + /** + * TODO: lepší ošetření chyby + */ + log.log(Level.WARNING, "Error while transforming article to plain text", e); + return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě."); + } } private String readXhtmlText(ResultSet rs) { @@ -177,7 +191,13 @@ * TODO: refaktorovat, přesunout */ private static String tidyXhtml(String inputText) throws IOException { - // https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966 + /* + * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966 + * + * TODO: + * - použít delší zástupný řetězec, ne jen jeden znak + * - umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<) + */ inputText = inputText.replaceAll("\\n", "◆\n"); Runtime r = Runtime.getRuntime();