org/sonews/storage/Article.java
author cli
Wed Aug 12 16:38:11 2009 +0200 (2009-08-12)
changeset 8 b62fe6ed39d3
child 12 bb6990c0dd1a
permissions -rw-r--r--
Disable strict parsing of email addresses.
     1 /*
     2  *   SONEWS News Server
     3  *   see AUTHORS for the list of contributors
     4  *
     5  *   This program is free software: you can redistribute it and/or modify
     6  *   it under the terms of the GNU General Public License as published by
     7  *   the Free Software Foundation, either version 3 of the License, or
     8  *   (at your option) any later version.
     9  *
    10  *   This program is distributed in the hope that it will be useful,
    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *   GNU General Public License for more details.
    14  *
    15  *   You should have received a copy of the GNU General Public License
    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 
    19 package org.sonews.storage;
    20 
    21 import java.io.ByteArrayInputStream;
    22 import java.io.ByteArrayOutputStream;
    23 import java.io.IOException;
    24 import java.io.InputStream;
    25 import java.nio.charset.Charset;
    26 import java.security.MessageDigest;
    27 import java.security.NoSuchAlgorithmException;
    28 import java.util.UUID;
    29 import java.util.ArrayList;
    30 import java.util.Enumeration;
    31 import java.util.List;
    32 import javax.mail.Header;
    33 import javax.mail.Message;
    34 import javax.mail.MessagingException;
    35 import javax.mail.Multipart;
    36 import javax.mail.internet.InternetHeaders;
    37 import org.sonews.config.Config;
    38 import org.sonews.util.Log;
    39 
    40 /**
    41  * Represents a newsgroup article.
    42  * @author Christian Lins
    43  * @author Denis Schwerdel
    44  * @since n3tpd/0.1
    45  */
    46 public class Article extends ArticleHead
    47 {
    48   
    49   /**
    50    * Loads the Article identified by the given ID from the JDBCDatabase.
    51    * @param messageID
    52    * @return null if Article is not found or if an error occurred.
    53    */
    54   public static Article getByMessageID(final String messageID)
    55   {
    56     try
    57     {
    58       return StorageManager.current().getArticle(messageID);
    59     }
    60     catch(StorageBackendException ex)
    61     {
    62       ex.printStackTrace();
    63       return null;
    64     }
    65   }
    66   
    67   private byte[] body       = new byte[0];
    68   
    69   /**
    70    * Default constructor.
    71    */
    72   public Article()
    73   {
    74   }
    75   
    76   /**
    77    * Creates a new Article object using the date from the given
    78    * raw data.
    79    */
    80   public Article(String headers, byte[] body)
    81   {
    82     try
    83     {
    84       this.body  = body;
    85 
    86       // Parse the header
    87       this.headers = new InternetHeaders(
    88         new ByteArrayInputStream(headers.getBytes()));
    89       
    90       this.headerSrc = headers;
    91     }
    92     catch(MessagingException ex)
    93     {
    94       ex.printStackTrace();
    95     }
    96   }
    97 
    98   /**
    99    * Creates an Article instance using the data from the javax.mail.Message
   100    * object.
   101    * @see javax.mail.Message
   102    * @param msg
   103    * @throws IOException
   104    * @throws MessagingException
   105    */
   106   public Article(final Message msg)
   107     throws IOException, MessagingException
   108   {
   109     this.headers = new InternetHeaders();
   110 
   111     for(Enumeration e = msg.getAllHeaders() ; e.hasMoreElements();) 
   112     {
   113       final Header header = (Header)e.nextElement();
   114       this.headers.addHeader(header.getName(), header.getValue());
   115     }
   116     
   117     // The "content" of the message can be a String if it's a simple text/plain
   118     // message, a Multipart object or an InputStream if the content is unknown.
   119     final Object content = msg.getContent();
   120     if(content instanceof String)
   121     {
   122       this.body = ((String)content).getBytes();
   123     }
   124     else if(content instanceof Multipart) // probably subclass MimeMultipart
   125     {
   126       // We're are not interested in the different parts of the MultipartMessage,
   127       // so we simply read in all data which *can* be huge.
   128       InputStream in = msg.getInputStream();
   129       this.body = readContent(in);
   130     }
   131     else if(content instanceof InputStream)
   132     {
   133       // The message format is unknown to the Message class, but we can
   134       // simply read in the whole message data.
   135       this.body = readContent((InputStream)content);
   136     }
   137     else
   138     {
   139       // Unknown content is probably a malformed mail we should skip.
   140       // On the other hand we produce an inconsistent mail mirror, but no
   141       // mail system must transport invalid content.
   142       Log.msg("Skipping message due to unknown content. Throwing exception...", true);
   143       throw new MessagingException("Unknown content: " + content);
   144     }
   145     
   146     // Validate headers
   147     validateHeaders();
   148   }
   149 
   150   /**
   151    * Reads from the given InputString into a byte array.
   152    * TODO: Move this generalized method to org.sonews.util.io.Resource.
   153    * @param in
   154    * @return
   155    * @throws IOException
   156    */
   157   private byte[] readContent(InputStream in)
   158     throws IOException
   159   {
   160     ByteArrayOutputStream out = new ByteArrayOutputStream();
   161 
   162     int b = in.read();
   163     while(b >= 0)
   164     {
   165       out.write(b);
   166       b = in.read();
   167     }
   168 
   169     return out.toByteArray();
   170   }
   171 
   172   /**
   173    * Removes the header identified by the given key.
   174    * @param headerKey
   175    */
   176   public void removeHeader(final String headerKey)
   177   {
   178     this.headers.removeHeader(headerKey);
   179     this.headerSrc = null;
   180   }
   181 
   182   /**
   183    * Generates a message id for this article and sets it into
   184    * the header object. You have to update the JDBCDatabase manually to make this
   185    * change persistent.
   186    * Note: a Message-ID should never be changed and only generated once.
   187    */
   188   private String generateMessageID()
   189   {
   190     String randomString;
   191     MessageDigest md5;
   192     try
   193     {
   194       md5 = MessageDigest.getInstance("MD5");
   195       md5.reset();
   196       md5.update(getBody());
   197       md5.update(getHeader(Headers.SUBJECT)[0].getBytes());
   198       md5.update(getHeader(Headers.FROM)[0].getBytes());
   199       byte[] result = md5.digest();
   200       StringBuffer hexString = new StringBuffer();
   201       for (int i = 0; i < result.length; i++)
   202       {
   203         hexString.append(Integer.toHexString(0xFF & result[i]));
   204       }
   205       randomString = hexString.toString();
   206     }
   207     catch (NoSuchAlgorithmException e)
   208     {
   209       e.printStackTrace();
   210       randomString = UUID.randomUUID().toString();
   211     }
   212     String msgID = "<" + randomString + "@"
   213         + Config.inst().get(Config.HOSTNAME, "localhost") + ">";
   214     
   215     this.headers.setHeader(Headers.MESSAGE_ID, msgID);
   216     
   217     return msgID;
   218   }
   219 
   220   /**
   221    * Returns the body string.
   222    */
   223   public byte[] getBody()
   224   {
   225     return body;
   226   }
   227 
   228   /**
   229    * @return Charset of the body text
   230    */
   231   private Charset getBodyCharset()
   232   {
   233     // We espect something like 
   234     // Content-Type: text/plain; charset=ISO-8859-15
   235     String contentType = getHeader(Headers.CONTENT_TYPE)[0];
   236     int idxCharsetStart = contentType.indexOf("charset=") + "charset=".length();
   237     int idxCharsetEnd   = contentType.indexOf(";", idxCharsetStart);
   238     
   239     String charsetName = "UTF-8";
   240     if(idxCharsetStart >= 0 && idxCharsetStart < contentType.length())
   241     {
   242       if(idxCharsetEnd < 0)
   243       {
   244         charsetName = contentType.substring(idxCharsetStart);
   245       }
   246       else
   247       {
   248         charsetName = contentType.substring(idxCharsetStart, idxCharsetEnd);
   249       }
   250     }
   251     
   252     // Sometimes there are '"' around the name
   253     if(charsetName.length() > 2 &&
   254       charsetName.charAt(0) == '"' && charsetName.endsWith("\""))
   255     {
   256       charsetName = charsetName.substring(1, charsetName.length() - 2);
   257     }
   258     
   259     // Create charset
   260     Charset charset = Charset.forName("UTF-8"); // This MUST be supported by JVM
   261     try
   262     {
   263       charset = Charset.forName(charsetName);
   264     }
   265     catch(Exception ex)
   266     {
   267       Log.msg(ex.getMessage(), false);
   268       Log.msg("Article.getBodyCharset(): Unknown charset: " + charsetName, false);
   269     }
   270     return charset;
   271   }
   272   
   273   /**
   274    * @return Numerical IDs of the newsgroups this Article belongs to.
   275    */
   276   public List<Group> getGroups()
   277   {
   278     String[]         groupnames = getHeader(Headers.NEWSGROUPS)[0].split(",");
   279     ArrayList<Group> groups     = new ArrayList<Group>();
   280 
   281     try
   282     {
   283       for(String newsgroup : groupnames)
   284       {
   285         newsgroup = newsgroup.trim();
   286         Group group = StorageManager.current().getGroup(newsgroup);
   287         if(group != null &&         // If the server does not provide the group, ignore it
   288           !groups.contains(group))  // Yes, there may be duplicates
   289         {
   290           groups.add(group);
   291         }
   292       }
   293     }
   294     catch(StorageBackendException ex)
   295     {
   296       ex.printStackTrace();
   297       return null;
   298     }
   299     return groups;
   300   }
   301 
   302   public void setBody(byte[] body)
   303   {
   304     this.body = body;
   305   }
   306   
   307   /**
   308    * 
   309    * @param groupname Name(s) of newsgroups
   310    */
   311   public void setGroup(String groupname)
   312   {
   313     this.headers.setHeader(Headers.NEWSGROUPS, groupname);
   314   }
   315 
   316   /**
   317    * Returns the Message-ID of this Article. If the appropriate header
   318    * is empty, a new Message-ID is created.
   319    * @return Message-ID of this Article.
   320    */
   321   public String getMessageID()
   322   {
   323     String[] msgID = getHeader(Headers.MESSAGE_ID);
   324     return msgID[0].equals("") ? generateMessageID() : msgID[0];
   325   }
   326   
   327   /**
   328    * @return String containing the Message-ID.
   329    */
   330   @Override
   331   public String toString()
   332   {
   333     return getMessageID();
   334   }
   335 
   336 }