org/sonews/storage/Article.java
changeset 33 f9bf183447d1
parent 16 5a4a41cfc0a3
     1.1 --- a/org/sonews/storage/Article.java	Thu Aug 20 21:31:03 2009 +0200
     1.2 +++ b/org/sonews/storage/Article.java	Sun Aug 29 17:03:21 2010 +0200
     1.3 @@ -21,8 +21,6 @@
     1.4  import java.io.ByteArrayInputStream;
     1.5  import java.io.ByteArrayOutputStream;
     1.6  import java.io.IOException;
     1.7 -import java.io.InputStream;
     1.8 -import java.nio.charset.Charset;
     1.9  import java.security.MessageDigest;
    1.10  import java.security.NoSuchAlgorithmException;
    1.11  import java.util.UUID;
    1.12 @@ -32,10 +30,8 @@
    1.13  import javax.mail.Header;
    1.14  import javax.mail.Message;
    1.15  import javax.mail.MessagingException;
    1.16 -import javax.mail.Multipart;
    1.17  import javax.mail.internet.InternetHeaders;
    1.18  import org.sonews.config.Config;
    1.19 -import org.sonews.util.Log;
    1.20  
    1.21  /**
    1.22   * Represents a newsgroup article.
    1.23 @@ -97,7 +93,7 @@
    1.24  
    1.25    /**
    1.26     * Creates an Article instance using the data from the javax.mail.Message
    1.27 -   * object.
    1.28 +   * object. This constructor is called by the Mailinglist gateway.
    1.29     * @see javax.mail.Message
    1.30     * @param msg
    1.31     * @throws IOException
    1.32 @@ -113,61 +109,25 @@
    1.33        final Header header = (Header)e.nextElement();
    1.34        this.headers.addHeader(header.getName(), header.getValue());
    1.35      }
    1.36 -    
    1.37 -    // The "content" of the message can be a String if it's a simple text/plain
    1.38 -    // message, a Multipart object or an InputStream if the content is unknown.
    1.39 -    final Object content = msg.getContent();
    1.40 -    if(content instanceof String)
    1.41 -    {
    1.42 -      this.body = ((String)content).getBytes(getBodyCharset());
    1.43 -    }
    1.44 -    else if(content instanceof Multipart) // probably subclass MimeMultipart
    1.45 -    {
    1.46 -      // We're are not interested in the different parts of the MultipartMessage,
    1.47 -      // so we simply read in all data which *can* be huge.
    1.48 -      InputStream in = msg.getInputStream();
    1.49 -      this.body = readContent(in);
    1.50 -    }
    1.51 -    else if(content instanceof InputStream)
    1.52 -    {
    1.53 -      // The message format is unknown to the Message class, but we can
    1.54 -      // simply read in the whole message data.
    1.55 -      this.body = readContent((InputStream)content);
    1.56 -    }
    1.57 -    else
    1.58 -    {
    1.59 -      // Unknown content is probably a malformed mail we should skip.
    1.60 -      // On the other hand we produce an inconsistent mail mirror, but no
    1.61 -      // mail system must transport invalid content.
    1.62 -      Log.get().severe("Skipping message due to unknown content. Throwing exception...");
    1.63 -      MessagingException ex = new MessagingException("Unknown content: " + content);
    1.64 -      Log.get().throwing("Article.java", "<init>", ex);
    1.65 -      throw ex;
    1.66 -    }
    1.67 +
    1.68 +	// Reads the raw byte body using Message.writeTo(OutputStream out)
    1.69 +	this.body = readContent(msg);
    1.70      
    1.71      // Validate headers
    1.72      validateHeaders();
    1.73    }
    1.74  
    1.75    /**
    1.76 -   * Reads from the given InputString into a byte array.
    1.77 -   * TODO: Move this generalized method to org.sonews.util.io.Resource.
    1.78 +   * Reads from the given Message into a byte array.
    1.79     * @param in
    1.80     * @return
    1.81     * @throws IOException
    1.82     */
    1.83 -  private byte[] readContent(InputStream in)
    1.84 -    throws IOException
    1.85 +  private byte[] readContent(Message in)
    1.86 +    throws IOException, MessagingException
    1.87    {
    1.88      ByteArrayOutputStream out = new ByteArrayOutputStream();
    1.89 -
    1.90 -    int b = in.read();
    1.91 -    while(b >= 0)
    1.92 -    {
    1.93 -      out.write(b);
    1.94 -      b = in.read();
    1.95 -    }
    1.96 -
    1.97 +    in.writeTo(out);
    1.98      return out.toByteArray();
    1.99    }
   1.100  
   1.101 @@ -226,51 +186,6 @@
   1.102    {
   1.103      return body;
   1.104    }
   1.105 -
   1.106 -  /**
   1.107 -   * @return Charset of the body text
   1.108 -   */
   1.109 -  private Charset getBodyCharset()
   1.110 -  {
   1.111 -    // We espect something like 
   1.112 -    // Content-Type: text/plain; charset=ISO-8859-15
   1.113 -    String contentType = getHeader(Headers.CONTENT_TYPE)[0];
   1.114 -    int idxCharsetStart = contentType.indexOf("charset=") + "charset=".length();
   1.115 -    int idxCharsetEnd   = contentType.indexOf(";", idxCharsetStart);
   1.116 -    
   1.117 -    String charsetName = "UTF-8";
   1.118 -    if(idxCharsetStart >= 0 && idxCharsetStart < contentType.length())
   1.119 -    {
   1.120 -      if(idxCharsetEnd < 0)
   1.121 -      {
   1.122 -        charsetName = contentType.substring(idxCharsetStart);
   1.123 -      }
   1.124 -      else
   1.125 -      {
   1.126 -        charsetName = contentType.substring(idxCharsetStart, idxCharsetEnd);
   1.127 -      }
   1.128 -    }
   1.129 -    
   1.130 -    // Sometimes there are '"' around the name
   1.131 -    if(charsetName.length() > 2 &&
   1.132 -      charsetName.charAt(0) == '"' && charsetName.endsWith("\""))
   1.133 -    {
   1.134 -      charsetName = charsetName.substring(1, charsetName.length() - 2);
   1.135 -    }
   1.136 -    
   1.137 -    // Create charset
   1.138 -    Charset charset = Charset.forName("UTF-8"); // This MUST be supported by JVM
   1.139 -    try
   1.140 -    {
   1.141 -      charset = Charset.forName(charsetName);
   1.142 -    }
   1.143 -    catch(Exception ex)
   1.144 -    {
   1.145 -      Log.get().severe(ex.getMessage());
   1.146 -      Log.get().severe("Article.getBodyCharset(): Unknown charset: " + charsetName);
   1.147 -    }
   1.148 -    return charset;
   1.149 -  }
   1.150    
   1.151    /**
   1.152     * @return Numerical IDs of the newsgroups this Article belongs to.