org/sonews/storage/Article.java
author cli
Wed Aug 12 16:38:11 2009 +0200 (2009-08-12)
changeset 8 b62fe6ed39d3
child 12 bb6990c0dd1a
permissions -rw-r--r--
Disable strict parsing of email addresses.
chris@3
     1
/*
chris@3
     2
 *   SONEWS News Server
chris@3
     3
 *   see AUTHORS for the list of contributors
chris@3
     4
 *
chris@3
     5
 *   This program is free software: you can redistribute it and/or modify
chris@3
     6
 *   it under the terms of the GNU General Public License as published by
chris@3
     7
 *   the Free Software Foundation, either version 3 of the License, or
chris@3
     8
 *   (at your option) any later version.
chris@3
     9
 *
chris@3
    10
 *   This program is distributed in the hope that it will be useful,
chris@3
    11
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
chris@3
    12
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
chris@3
    13
 *   GNU General Public License for more details.
chris@3
    14
 *
chris@3
    15
 *   You should have received a copy of the GNU General Public License
chris@3
    16
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
chris@3
    17
 */
chris@3
    18
chris@3
    19
package org.sonews.storage;
chris@3
    20
chris@3
    21
import java.io.ByteArrayInputStream;
chris@3
    22
import java.io.ByteArrayOutputStream;
chris@3
    23
import java.io.IOException;
chris@3
    24
import java.io.InputStream;
chris@3
    25
import java.nio.charset.Charset;
chris@3
    26
import java.security.MessageDigest;
chris@3
    27
import java.security.NoSuchAlgorithmException;
chris@3
    28
import java.util.UUID;
chris@3
    29
import java.util.ArrayList;
chris@3
    30
import java.util.Enumeration;
chris@3
    31
import java.util.List;
chris@3
    32
import javax.mail.Header;
chris@3
    33
import javax.mail.Message;
chris@3
    34
import javax.mail.MessagingException;
chris@3
    35
import javax.mail.Multipart;
chris@3
    36
import javax.mail.internet.InternetHeaders;
chris@3
    37
import org.sonews.config.Config;
chris@3
    38
import org.sonews.util.Log;
chris@3
    39
chris@3
    40
/**
chris@3
    41
 * Represents a newsgroup article.
chris@3
    42
 * @author Christian Lins
chris@3
    43
 * @author Denis Schwerdel
chris@3
    44
 * @since n3tpd/0.1
chris@3
    45
 */
chris@3
    46
public class Article extends ArticleHead
chris@3
    47
{
chris@3
    48
  
chris@3
    49
  /**
chris@3
    50
   * Loads the Article identified by the given ID from the JDBCDatabase.
chris@3
    51
   * @param messageID
chris@3
    52
   * @return null if Article is not found or if an error occurred.
chris@3
    53
   */
chris@3
    54
  public static Article getByMessageID(final String messageID)
chris@3
    55
  {
chris@3
    56
    try
chris@3
    57
    {
chris@3
    58
      return StorageManager.current().getArticle(messageID);
chris@3
    59
    }
chris@3
    60
    catch(StorageBackendException ex)
chris@3
    61
    {
chris@3
    62
      ex.printStackTrace();
chris@3
    63
      return null;
chris@3
    64
    }
chris@3
    65
  }
chris@3
    66
  
chris@3
    67
  private byte[] body       = new byte[0];
chris@3
    68
  
chris@3
    69
  /**
chris@3
    70
   * Default constructor.
chris@3
    71
   */
chris@3
    72
  public Article()
chris@3
    73
  {
chris@3
    74
  }
chris@3
    75
  
chris@3
    76
  /**
chris@3
    77
   * Creates a new Article object using the date from the given
chris@3
    78
   * raw data.
chris@3
    79
   */
chris@3
    80
  public Article(String headers, byte[] body)
chris@3
    81
  {
chris@3
    82
    try
chris@3
    83
    {
chris@3
    84
      this.body  = body;
chris@3
    85
chris@3
    86
      // Parse the header
chris@3
    87
      this.headers = new InternetHeaders(
chris@3
    88
        new ByteArrayInputStream(headers.getBytes()));
chris@3
    89
      
chris@3
    90
      this.headerSrc = headers;
chris@3
    91
    }
chris@3
    92
    catch(MessagingException ex)
chris@3
    93
    {
chris@3
    94
      ex.printStackTrace();
chris@3
    95
    }
chris@3
    96
  }
chris@3
    97
chris@3
    98
  /**
chris@3
    99
   * Creates an Article instance using the data from the javax.mail.Message
chris@3
   100
   * object.
chris@3
   101
   * @see javax.mail.Message
chris@3
   102
   * @param msg
chris@3
   103
   * @throws IOException
chris@3
   104
   * @throws MessagingException
chris@3
   105
   */
chris@3
   106
  public Article(final Message msg)
chris@3
   107
    throws IOException, MessagingException
chris@3
   108
  {
chris@3
   109
    this.headers = new InternetHeaders();
chris@3
   110
chris@3
   111
    for(Enumeration e = msg.getAllHeaders() ; e.hasMoreElements();) 
chris@3
   112
    {
chris@3
   113
      final Header header = (Header)e.nextElement();
chris@3
   114
      this.headers.addHeader(header.getName(), header.getValue());
chris@3
   115
    }
chris@3
   116
    
chris@3
   117
    // The "content" of the message can be a String if it's a simple text/plain
chris@3
   118
    // message, a Multipart object or an InputStream if the content is unknown.
chris@3
   119
    final Object content = msg.getContent();
chris@3
   120
    if(content instanceof String)
chris@3
   121
    {
chris@3
   122
      this.body = ((String)content).getBytes();
chris@3
   123
    }
chris@3
   124
    else if(content instanceof Multipart) // probably subclass MimeMultipart
chris@3
   125
    {
chris@3
   126
      // We're are not interested in the different parts of the MultipartMessage,
chris@3
   127
      // so we simply read in all data which *can* be huge.
chris@3
   128
      InputStream in = msg.getInputStream();
chris@3
   129
      this.body = readContent(in);
chris@3
   130
    }
chris@3
   131
    else if(content instanceof InputStream)
chris@3
   132
    {
chris@3
   133
      // The message format is unknown to the Message class, but we can
chris@3
   134
      // simply read in the whole message data.
chris@3
   135
      this.body = readContent((InputStream)content);
chris@3
   136
    }
chris@3
   137
    else
chris@3
   138
    {
chris@3
   139
      // Unknown content is probably a malformed mail we should skip.
chris@3
   140
      // On the other hand we produce an inconsistent mail mirror, but no
chris@3
   141
      // mail system must transport invalid content.
chris@3
   142
      Log.msg("Skipping message due to unknown content. Throwing exception...", true);
chris@3
   143
      throw new MessagingException("Unknown content: " + content);
chris@3
   144
    }
chris@3
   145
    
chris@3
   146
    // Validate headers
chris@3
   147
    validateHeaders();
chris@3
   148
  }
chris@3
   149
chris@3
   150
  /**
chris@3
   151
   * Reads from the given InputString into a byte array.
chris@3
   152
   * TODO: Move this generalized method to org.sonews.util.io.Resource.
chris@3
   153
   * @param in
chris@3
   154
   * @return
chris@3
   155
   * @throws IOException
chris@3
   156
   */
chris@3
   157
  private byte[] readContent(InputStream in)
chris@3
   158
    throws IOException
chris@3
   159
  {
chris@3
   160
    ByteArrayOutputStream out = new ByteArrayOutputStream();
chris@3
   161
chris@3
   162
    int b = in.read();
chris@3
   163
    while(b >= 0)
chris@3
   164
    {
chris@3
   165
      out.write(b);
chris@3
   166
      b = in.read();
chris@3
   167
    }
chris@3
   168
chris@3
   169
    return out.toByteArray();
chris@3
   170
  }
chris@3
   171
chris@3
   172
  /**
chris@3
   173
   * Removes the header identified by the given key.
chris@3
   174
   * @param headerKey
chris@3
   175
   */
chris@3
   176
  public void removeHeader(final String headerKey)
chris@3
   177
  {
chris@3
   178
    this.headers.removeHeader(headerKey);
chris@3
   179
    this.headerSrc = null;
chris@3
   180
  }
chris@3
   181
chris@3
   182
  /**
chris@3
   183
   * Generates a message id for this article and sets it into
chris@3
   184
   * the header object. You have to update the JDBCDatabase manually to make this
chris@3
   185
   * change persistent.
chris@3
   186
   * Note: a Message-ID should never be changed and only generated once.
chris@3
   187
   */
chris@3
   188
  private String generateMessageID()
chris@3
   189
  {
chris@3
   190
    String randomString;
chris@3
   191
    MessageDigest md5;
chris@3
   192
    try
chris@3
   193
    {
chris@3
   194
      md5 = MessageDigest.getInstance("MD5");
chris@3
   195
      md5.reset();
chris@3
   196
      md5.update(getBody());
chris@3
   197
      md5.update(getHeader(Headers.SUBJECT)[0].getBytes());
chris@3
   198
      md5.update(getHeader(Headers.FROM)[0].getBytes());
chris@3
   199
      byte[] result = md5.digest();
chris@3
   200
      StringBuffer hexString = new StringBuffer();
chris@3
   201
      for (int i = 0; i < result.length; i++)
chris@3
   202
      {
chris@3
   203
        hexString.append(Integer.toHexString(0xFF & result[i]));
chris@3
   204
      }
chris@3
   205
      randomString = hexString.toString();
chris@3
   206
    }
chris@3
   207
    catch (NoSuchAlgorithmException e)
chris@3
   208
    {
chris@3
   209
      e.printStackTrace();
chris@3
   210
      randomString = UUID.randomUUID().toString();
chris@3
   211
    }
chris@3
   212
    String msgID = "<" + randomString + "@"
chris@3
   213
        + Config.inst().get(Config.HOSTNAME, "localhost") + ">";
chris@3
   214
    
chris@3
   215
    this.headers.setHeader(Headers.MESSAGE_ID, msgID);
chris@3
   216
    
chris@3
   217
    return msgID;
chris@3
   218
  }
chris@3
   219
chris@3
   220
  /**
chris@3
   221
   * Returns the body string.
chris@3
   222
   */
chris@3
   223
  public byte[] getBody()
chris@3
   224
  {
chris@3
   225
    return body;
chris@3
   226
  }
chris@3
   227
chris@3
   228
  /**
chris@3
   229
   * @return Charset of the body text
chris@3
   230
   */
chris@3
   231
  private Charset getBodyCharset()
chris@3
   232
  {
chris@3
   233
    // We espect something like 
chris@3
   234
    // Content-Type: text/plain; charset=ISO-8859-15
chris@3
   235
    String contentType = getHeader(Headers.CONTENT_TYPE)[0];
chris@3
   236
    int idxCharsetStart = contentType.indexOf("charset=") + "charset=".length();
chris@3
   237
    int idxCharsetEnd   = contentType.indexOf(";", idxCharsetStart);
chris@3
   238
    
chris@3
   239
    String charsetName = "UTF-8";
chris@3
   240
    if(idxCharsetStart >= 0 && idxCharsetStart < contentType.length())
chris@3
   241
    {
chris@3
   242
      if(idxCharsetEnd < 0)
chris@3
   243
      {
chris@3
   244
        charsetName = contentType.substring(idxCharsetStart);
chris@3
   245
      }
chris@3
   246
      else
chris@3
   247
      {
chris@3
   248
        charsetName = contentType.substring(idxCharsetStart, idxCharsetEnd);
chris@3
   249
      }
chris@3
   250
    }
chris@3
   251
    
chris@3
   252
    // Sometimes there are '"' around the name
chris@3
   253
    if(charsetName.length() > 2 &&
chris@3
   254
      charsetName.charAt(0) == '"' && charsetName.endsWith("\""))
chris@3
   255
    {
chris@3
   256
      charsetName = charsetName.substring(1, charsetName.length() - 2);
chris@3
   257
    }
chris@3
   258
    
chris@3
   259
    // Create charset
chris@3
   260
    Charset charset = Charset.forName("UTF-8"); // This MUST be supported by JVM
chris@3
   261
    try
chris@3
   262
    {
chris@3
   263
      charset = Charset.forName(charsetName);
chris@3
   264
    }
chris@3
   265
    catch(Exception ex)
chris@3
   266
    {
chris@3
   267
      Log.msg(ex.getMessage(), false);
chris@3
   268
      Log.msg("Article.getBodyCharset(): Unknown charset: " + charsetName, false);
chris@3
   269
    }
chris@3
   270
    return charset;
chris@3
   271
  }
chris@3
   272
  
chris@3
   273
  /**
chris@3
   274
   * @return Numerical IDs of the newsgroups this Article belongs to.
chris@3
   275
   */
chris@3
   276
  public List<Group> getGroups()
chris@3
   277
  {
chris@3
   278
    String[]         groupnames = getHeader(Headers.NEWSGROUPS)[0].split(",");
chris@3
   279
    ArrayList<Group> groups     = new ArrayList<Group>();
chris@3
   280
chris@3
   281
    try
chris@3
   282
    {
chris@3
   283
      for(String newsgroup : groupnames)
chris@3
   284
      {
chris@3
   285
        newsgroup = newsgroup.trim();
chris@3
   286
        Group group = StorageManager.current().getGroup(newsgroup);
chris@3
   287
        if(group != null &&         // If the server does not provide the group, ignore it
chris@3
   288
          !groups.contains(group))  // Yes, there may be duplicates
chris@3
   289
        {
chris@3
   290
          groups.add(group);
chris@3
   291
        }
chris@3
   292
      }
chris@3
   293
    }
chris@3
   294
    catch(StorageBackendException ex)
chris@3
   295
    {
chris@3
   296
      ex.printStackTrace();
chris@3
   297
      return null;
chris@3
   298
    }
chris@3
   299
    return groups;
chris@3
   300
  }
chris@3
   301
chris@3
   302
  public void setBody(byte[] body)
chris@3
   303
  {
chris@3
   304
    this.body = body;
chris@3
   305
  }
chris@3
   306
  
chris@3
   307
  /**
chris@3
   308
   * 
chris@3
   309
   * @param groupname Name(s) of newsgroups
chris@3
   310
   */
chris@3
   311
  public void setGroup(String groupname)
chris@3
   312
  {
chris@3
   313
    this.headers.setHeader(Headers.NEWSGROUPS, groupname);
chris@3
   314
  }
chris@3
   315
chris@3
   316
  /**
chris@3
   317
   * Returns the Message-ID of this Article. If the appropriate header
chris@3
   318
   * is empty, a new Message-ID is created.
chris@3
   319
   * @return Message-ID of this Article.
chris@3
   320
   */
chris@3
   321
  public String getMessageID()
chris@3
   322
  {
chris@3
   323
    String[] msgID = getHeader(Headers.MESSAGE_ID);
chris@3
   324
    return msgID[0].equals("") ? generateMessageID() : msgID[0];
chris@3
   325
  }
chris@3
   326
  
chris@3
   327
  /**
chris@3
   328
   * @return String containing the Message-ID.
chris@3
   329
   */
chris@3
   330
  @Override
chris@3
   331
  public String toString()
chris@3
   332
  {
chris@3
   333
    return getMessageID();
chris@3
   334
  }
chris@3
   335
chris@3
   336
}