org/sonews/storage/Article.java
author cli
Fri Aug 21 17:33:15 2009 +0200 (2009-08-21)
changeset 18 7e527fdf0fa8
parent 12 bb6990c0dd1a
child 33 f9bf183447d1
permissions -rw-r--r--
Fix for #549.
     1 /*
     2  *   SONEWS News Server
     3  *   see AUTHORS for the list of contributors
     4  *
     5  *   This program is free software: you can redistribute it and/or modify
     6  *   it under the terms of the GNU General Public License as published by
     7  *   the Free Software Foundation, either version 3 of the License, or
     8  *   (at your option) any later version.
     9  *
    10  *   This program is distributed in the hope that it will be useful,
    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *   GNU General Public License for more details.
    14  *
    15  *   You should have received a copy of the GNU General Public License
    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 
    19 package org.sonews.storage;
    20 
    21 import java.io.ByteArrayInputStream;
    22 import java.io.ByteArrayOutputStream;
    23 import java.io.IOException;
    24 import java.io.InputStream;
    25 import java.nio.charset.Charset;
    26 import java.security.MessageDigest;
    27 import java.security.NoSuchAlgorithmException;
    28 import java.util.UUID;
    29 import java.util.ArrayList;
    30 import java.util.Enumeration;
    31 import java.util.List;
    32 import javax.mail.Header;
    33 import javax.mail.Message;
    34 import javax.mail.MessagingException;
    35 import javax.mail.Multipart;
    36 import javax.mail.internet.InternetHeaders;
    37 import org.sonews.config.Config;
    38 import org.sonews.util.Log;
    39 
    40 /**
    41  * Represents a newsgroup article.
    42  * @author Christian Lins
    43  * @author Denis Schwerdel
    44  * @since n3tpd/0.1
    45  */
    46 public class Article extends ArticleHead
    47 {
    48   
    49   /**
    50    * Loads the Article identified by the given ID from the JDBCDatabase.
    51    * @param messageID
    52    * @return null if Article is not found or if an error occurred.
    53    */
    54   public static Article getByMessageID(final String messageID)
    55   {
    56     try
    57     {
    58       return StorageManager.current().getArticle(messageID);
    59     }
    60     catch(StorageBackendException ex)
    61     {
    62       ex.printStackTrace();
    63       return null;
    64     }
    65   }
    66   
    67   private byte[] body       = new byte[0];
    68   
    69   /**
    70    * Default constructor.
    71    */
    72   public Article()
    73   {
    74   }
    75   
    76   /**
    77    * Creates a new Article object using the date from the given
    78    * raw data.
    79    */
    80   public Article(String headers, byte[] body)
    81   {
    82     try
    83     {
    84       this.body  = body;
    85 
    86       // Parse the header
    87       this.headers = new InternetHeaders(
    88         new ByteArrayInputStream(headers.getBytes()));
    89       
    90       this.headerSrc = headers;
    91     }
    92     catch(MessagingException ex)
    93     {
    94       ex.printStackTrace();
    95     }
    96   }
    97 
    98   /**
    99    * Creates an Article instance using the data from the javax.mail.Message
   100    * object.
   101    * @see javax.mail.Message
   102    * @param msg
   103    * @throws IOException
   104    * @throws MessagingException
   105    */
   106   public Article(final Message msg)
   107     throws IOException, MessagingException
   108   {
   109     this.headers = new InternetHeaders();
   110 
   111     for(Enumeration e = msg.getAllHeaders() ; e.hasMoreElements();) 
   112     {
   113       final Header header = (Header)e.nextElement();
   114       this.headers.addHeader(header.getName(), header.getValue());
   115     }
   116     
   117     // The "content" of the message can be a String if it's a simple text/plain
   118     // message, a Multipart object or an InputStream if the content is unknown.
   119     final Object content = msg.getContent();
   120     if(content instanceof String)
   121     {
   122       this.body = ((String)content).getBytes(getBodyCharset());
   123     }
   124     else if(content instanceof Multipart) // probably subclass MimeMultipart
   125     {
   126       // We're are not interested in the different parts of the MultipartMessage,
   127       // so we simply read in all data which *can* be huge.
   128       InputStream in = msg.getInputStream();
   129       this.body = readContent(in);
   130     }
   131     else if(content instanceof InputStream)
   132     {
   133       // The message format is unknown to the Message class, but we can
   134       // simply read in the whole message data.
   135       this.body = readContent((InputStream)content);
   136     }
   137     else
   138     {
   139       // Unknown content is probably a malformed mail we should skip.
   140       // On the other hand we produce an inconsistent mail mirror, but no
   141       // mail system must transport invalid content.
   142       Log.get().severe("Skipping message due to unknown content. Throwing exception...");
   143       MessagingException ex = new MessagingException("Unknown content: " + content);
   144       Log.get().throwing("Article.java", "<init>", ex);
   145       throw ex;
   146     }
   147     
   148     // Validate headers
   149     validateHeaders();
   150   }
   151 
   152   /**
   153    * Reads from the given InputString into a byte array.
   154    * TODO: Move this generalized method to org.sonews.util.io.Resource.
   155    * @param in
   156    * @return
   157    * @throws IOException
   158    */
   159   private byte[] readContent(InputStream in)
   160     throws IOException
   161   {
   162     ByteArrayOutputStream out = new ByteArrayOutputStream();
   163 
   164     int b = in.read();
   165     while(b >= 0)
   166     {
   167       out.write(b);
   168       b = in.read();
   169     }
   170 
   171     return out.toByteArray();
   172   }
   173 
   174   /**
   175    * Removes the header identified by the given key.
   176    * @param headerKey
   177    */
   178   public void removeHeader(final String headerKey)
   179   {
   180     this.headers.removeHeader(headerKey);
   181     this.headerSrc = null;
   182   }
   183 
   184   /**
   185    * Generates a message id for this article and sets it into
   186    * the header object. You have to update the JDBCDatabase manually to make this
   187    * change persistent.
   188    * Note: a Message-ID should never be changed and only generated once.
   189    */
   190   private String generateMessageID()
   191   {
   192     String randomString;
   193     MessageDigest md5;
   194     try
   195     {
   196       md5 = MessageDigest.getInstance("MD5");
   197       md5.reset();
   198       md5.update(getBody());
   199       md5.update(getHeader(Headers.SUBJECT)[0].getBytes());
   200       md5.update(getHeader(Headers.FROM)[0].getBytes());
   201       byte[] result = md5.digest();
   202       StringBuffer hexString = new StringBuffer();
   203       for (int i = 0; i < result.length; i++)
   204       {
   205         hexString.append(Integer.toHexString(0xFF & result[i]));
   206       }
   207       randomString = hexString.toString();
   208     }
   209     catch (NoSuchAlgorithmException e)
   210     {
   211       e.printStackTrace();
   212       randomString = UUID.randomUUID().toString();
   213     }
   214     String msgID = "<" + randomString + "@"
   215         + Config.inst().get(Config.HOSTNAME, "localhost") + ">";
   216     
   217     this.headers.setHeader(Headers.MESSAGE_ID, msgID);
   218     
   219     return msgID;
   220   }
   221 
   222   /**
   223    * Returns the body string.
   224    */
   225   public byte[] getBody()
   226   {
   227     return body;
   228   }
   229 
   230   /**
   231    * @return Charset of the body text
   232    */
   233   private Charset getBodyCharset()
   234   {
   235     // We espect something like 
   236     // Content-Type: text/plain; charset=ISO-8859-15
   237     String contentType = getHeader(Headers.CONTENT_TYPE)[0];
   238     int idxCharsetStart = contentType.indexOf("charset=") + "charset=".length();
   239     int idxCharsetEnd   = contentType.indexOf(";", idxCharsetStart);
   240     
   241     String charsetName = "UTF-8";
   242     if(idxCharsetStart >= 0 && idxCharsetStart < contentType.length())
   243     {
   244       if(idxCharsetEnd < 0)
   245       {
   246         charsetName = contentType.substring(idxCharsetStart);
   247       }
   248       else
   249       {
   250         charsetName = contentType.substring(idxCharsetStart, idxCharsetEnd);
   251       }
   252     }
   253     
   254     // Sometimes there are '"' around the name
   255     if(charsetName.length() > 2 &&
   256       charsetName.charAt(0) == '"' && charsetName.endsWith("\""))
   257     {
   258       charsetName = charsetName.substring(1, charsetName.length() - 2);
   259     }
   260     
   261     // Create charset
   262     Charset charset = Charset.forName("UTF-8"); // This MUST be supported by JVM
   263     try
   264     {
   265       charset = Charset.forName(charsetName);
   266     }
   267     catch(Exception ex)
   268     {
   269       Log.get().severe(ex.getMessage());
   270       Log.get().severe("Article.getBodyCharset(): Unknown charset: " + charsetName);
   271     }
   272     return charset;
   273   }
   274   
   275   /**
   276    * @return Numerical IDs of the newsgroups this Article belongs to.
   277    */
   278   public List<Group> getGroups()
   279   {
   280     String[]         groupnames = getHeader(Headers.NEWSGROUPS)[0].split(",");
   281     ArrayList<Group> groups     = new ArrayList<Group>();
   282 
   283     try
   284     {
   285       for(String newsgroup : groupnames)
   286       {
   287         newsgroup = newsgroup.trim();
   288         Group group = StorageManager.current().getGroup(newsgroup);
   289         if(group != null &&         // If the server does not provide the group, ignore it
   290           !groups.contains(group))  // Yes, there may be duplicates
   291         {
   292           groups.add(group);
   293         }
   294       }
   295     }
   296     catch(StorageBackendException ex)
   297     {
   298       ex.printStackTrace();
   299       return null;
   300     }
   301     return groups;
   302   }
   303 
   304   public void setBody(byte[] body)
   305   {
   306     this.body = body;
   307   }
   308   
   309   /**
   310    * 
   311    * @param groupname Name(s) of newsgroups
   312    */
   313   public void setGroup(String groupname)
   314   {
   315     this.headers.setHeader(Headers.NEWSGROUPS, groupname);
   316   }
   317 
   318   /**
   319    * Returns the Message-ID of this Article. If the appropriate header
   320    * is empty, a new Message-ID is created.
   321    * @return Message-ID of this Article.
   322    */
   323   public String getMessageID()
   324   {
   325     String[] msgID = getHeader(Headers.MESSAGE_ID);
   326     return msgID[0].equals("") ? generateMessageID() : msgID[0];
   327   }
   328   
   329   /**
   330    * @return String containing the Message-ID.
   331    */
   332   @Override
   333   public String toString()
   334   {
   335     return getMessageID();
   336   }
   337 
   338 }