org/sonews/storage/Article.java
author cli
Thu Aug 20 21:31:03 2009 +0200 (2009-08-20)
changeset 16 5a4a41cfc0a3
parent 12 bb6990c0dd1a
child 33 f9bf183447d1
permissions -rw-r--r--
Issue #538 fixed.
chris@3
     1
/*
chris@3
     2
 *   SONEWS News Server
chris@3
     3
 *   see AUTHORS for the list of contributors
chris@3
     4
 *
chris@3
     5
 *   This program is free software: you can redistribute it and/or modify
chris@3
     6
 *   it under the terms of the GNU General Public License as published by
chris@3
     7
 *   the Free Software Foundation, either version 3 of the License, or
chris@3
     8
 *   (at your option) any later version.
chris@3
     9
 *
chris@3
    10
 *   This program is distributed in the hope that it will be useful,
chris@3
    11
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
chris@3
    12
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
chris@3
    13
 *   GNU General Public License for more details.
chris@3
    14
 *
chris@3
    15
 *   You should have received a copy of the GNU General Public License
chris@3
    16
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
chris@3
    17
 */
chris@3
    18
chris@3
    19
package org.sonews.storage;
chris@3
    20
chris@3
    21
import java.io.ByteArrayInputStream;
chris@3
    22
import java.io.ByteArrayOutputStream;
chris@3
    23
import java.io.IOException;
chris@3
    24
import java.io.InputStream;
chris@3
    25
import java.nio.charset.Charset;
chris@3
    26
import java.security.MessageDigest;
chris@3
    27
import java.security.NoSuchAlgorithmException;
chris@3
    28
import java.util.UUID;
chris@3
    29
import java.util.ArrayList;
chris@3
    30
import java.util.Enumeration;
chris@3
    31
import java.util.List;
chris@3
    32
import javax.mail.Header;
chris@3
    33
import javax.mail.Message;
chris@3
    34
import javax.mail.MessagingException;
chris@3
    35
import javax.mail.Multipart;
chris@3
    36
import javax.mail.internet.InternetHeaders;
chris@3
    37
import org.sonews.config.Config;
chris@3
    38
import org.sonews.util.Log;
chris@3
    39
chris@3
    40
/**
chris@3
    41
 * Represents a newsgroup article.
chris@3
    42
 * @author Christian Lins
chris@3
    43
 * @author Denis Schwerdel
chris@3
    44
 * @since n3tpd/0.1
chris@3
    45
 */
chris@3
    46
public class Article extends ArticleHead
chris@3
    47
{
chris@3
    48
  
chris@3
    49
  /**
chris@3
    50
   * Loads the Article identified by the given ID from the JDBCDatabase.
chris@3
    51
   * @param messageID
chris@3
    52
   * @return null if Article is not found or if an error occurred.
chris@3
    53
   */
chris@3
    54
  public static Article getByMessageID(final String messageID)
chris@3
    55
  {
chris@3
    56
    try
chris@3
    57
    {
chris@3
    58
      return StorageManager.current().getArticle(messageID);
chris@3
    59
    }
chris@3
    60
    catch(StorageBackendException ex)
chris@3
    61
    {
chris@3
    62
      ex.printStackTrace();
chris@3
    63
      return null;
chris@3
    64
    }
chris@3
    65
  }
chris@3
    66
  
chris@3
    67
  private byte[] body       = new byte[0];
chris@3
    68
  
chris@3
    69
  /**
chris@3
    70
   * Default constructor.
chris@3
    71
   */
chris@3
    72
  public Article()
chris@3
    73
  {
chris@3
    74
  }
chris@3
    75
  
chris@3
    76
  /**
chris@3
    77
   * Creates a new Article object using the date from the given
chris@3
    78
   * raw data.
chris@3
    79
   */
chris@3
    80
  public Article(String headers, byte[] body)
chris@3
    81
  {
chris@3
    82
    try
chris@3
    83
    {
chris@3
    84
      this.body  = body;
chris@3
    85
chris@3
    86
      // Parse the header
chris@3
    87
      this.headers = new InternetHeaders(
chris@3
    88
        new ByteArrayInputStream(headers.getBytes()));
chris@3
    89
      
chris@3
    90
      this.headerSrc = headers;
chris@3
    91
    }
chris@3
    92
    catch(MessagingException ex)
chris@3
    93
    {
chris@3
    94
      ex.printStackTrace();
chris@3
    95
    }
chris@3
    96
  }
chris@3
    97
chris@3
    98
  /**
chris@3
    99
   * Creates an Article instance using the data from the javax.mail.Message
chris@3
   100
   * object.
chris@3
   101
   * @see javax.mail.Message
chris@3
   102
   * @param msg
chris@3
   103
   * @throws IOException
chris@3
   104
   * @throws MessagingException
chris@3
   105
   */
chris@3
   106
  public Article(final Message msg)
chris@3
   107
    throws IOException, MessagingException
chris@3
   108
  {
chris@3
   109
    this.headers = new InternetHeaders();
chris@3
   110
chris@3
   111
    for(Enumeration e = msg.getAllHeaders() ; e.hasMoreElements();) 
chris@3
   112
    {
chris@3
   113
      final Header header = (Header)e.nextElement();
chris@3
   114
      this.headers.addHeader(header.getName(), header.getValue());
chris@3
   115
    }
chris@3
   116
    
chris@3
   117
    // The "content" of the message can be a String if it's a simple text/plain
chris@3
   118
    // message, a Multipart object or an InputStream if the content is unknown.
chris@3
   119
    final Object content = msg.getContent();
chris@3
   120
    if(content instanceof String)
chris@3
   121
    {
cli@12
   122
      this.body = ((String)content).getBytes(getBodyCharset());
chris@3
   123
    }
chris@3
   124
    else if(content instanceof Multipart) // probably subclass MimeMultipart
chris@3
   125
    {
chris@3
   126
      // We're are not interested in the different parts of the MultipartMessage,
chris@3
   127
      // so we simply read in all data which *can* be huge.
chris@3
   128
      InputStream in = msg.getInputStream();
chris@3
   129
      this.body = readContent(in);
chris@3
   130
    }
chris@3
   131
    else if(content instanceof InputStream)
chris@3
   132
    {
chris@3
   133
      // The message format is unknown to the Message class, but we can
chris@3
   134
      // simply read in the whole message data.
chris@3
   135
      this.body = readContent((InputStream)content);
chris@3
   136
    }
chris@3
   137
    else
chris@3
   138
    {
chris@3
   139
      // Unknown content is probably a malformed mail we should skip.
chris@3
   140
      // On the other hand we produce an inconsistent mail mirror, but no
chris@3
   141
      // mail system must transport invalid content.
cli@16
   142
      Log.get().severe("Skipping message due to unknown content. Throwing exception...");
cli@16
   143
      MessagingException ex = new MessagingException("Unknown content: " + content);
cli@16
   144
      Log.get().throwing("Article.java", "<init>", ex);
cli@16
   145
      throw ex;
chris@3
   146
    }
chris@3
   147
    
chris@3
   148
    // Validate headers
chris@3
   149
    validateHeaders();
chris@3
   150
  }
chris@3
   151
chris@3
   152
  /**
chris@3
   153
   * Reads from the given InputString into a byte array.
chris@3
   154
   * TODO: Move this generalized method to org.sonews.util.io.Resource.
chris@3
   155
   * @param in
chris@3
   156
   * @return
chris@3
   157
   * @throws IOException
chris@3
   158
   */
chris@3
   159
  private byte[] readContent(InputStream in)
chris@3
   160
    throws IOException
chris@3
   161
  {
chris@3
   162
    ByteArrayOutputStream out = new ByteArrayOutputStream();
chris@3
   163
chris@3
   164
    int b = in.read();
chris@3
   165
    while(b >= 0)
chris@3
   166
    {
chris@3
   167
      out.write(b);
chris@3
   168
      b = in.read();
chris@3
   169
    }
chris@3
   170
chris@3
   171
    return out.toByteArray();
chris@3
   172
  }
chris@3
   173
chris@3
   174
  /**
chris@3
   175
   * Removes the header identified by the given key.
chris@3
   176
   * @param headerKey
chris@3
   177
   */
chris@3
   178
  public void removeHeader(final String headerKey)
chris@3
   179
  {
chris@3
   180
    this.headers.removeHeader(headerKey);
chris@3
   181
    this.headerSrc = null;
chris@3
   182
  }
chris@3
   183
chris@3
   184
  /**
chris@3
   185
   * Generates a message id for this article and sets it into
chris@3
   186
   * the header object. You have to update the JDBCDatabase manually to make this
chris@3
   187
   * change persistent.
chris@3
   188
   * Note: a Message-ID should never be changed and only generated once.
chris@3
   189
   */
chris@3
   190
  private String generateMessageID()
chris@3
   191
  {
chris@3
   192
    String randomString;
chris@3
   193
    MessageDigest md5;
chris@3
   194
    try
chris@3
   195
    {
chris@3
   196
      md5 = MessageDigest.getInstance("MD5");
chris@3
   197
      md5.reset();
chris@3
   198
      md5.update(getBody());
chris@3
   199
      md5.update(getHeader(Headers.SUBJECT)[0].getBytes());
chris@3
   200
      md5.update(getHeader(Headers.FROM)[0].getBytes());
chris@3
   201
      byte[] result = md5.digest();
chris@3
   202
      StringBuffer hexString = new StringBuffer();
chris@3
   203
      for (int i = 0; i < result.length; i++)
chris@3
   204
      {
chris@3
   205
        hexString.append(Integer.toHexString(0xFF & result[i]));
chris@3
   206
      }
chris@3
   207
      randomString = hexString.toString();
chris@3
   208
    }
chris@3
   209
    catch (NoSuchAlgorithmException e)
chris@3
   210
    {
chris@3
   211
      e.printStackTrace();
chris@3
   212
      randomString = UUID.randomUUID().toString();
chris@3
   213
    }
chris@3
   214
    String msgID = "<" + randomString + "@"
chris@3
   215
        + Config.inst().get(Config.HOSTNAME, "localhost") + ">";
chris@3
   216
    
chris@3
   217
    this.headers.setHeader(Headers.MESSAGE_ID, msgID);
chris@3
   218
    
chris@3
   219
    return msgID;
chris@3
   220
  }
chris@3
   221
chris@3
   222
  /**
chris@3
   223
   * Returns the body string.
chris@3
   224
   */
chris@3
   225
  public byte[] getBody()
chris@3
   226
  {
chris@3
   227
    return body;
chris@3
   228
  }
chris@3
   229
chris@3
   230
  /**
chris@3
   231
   * @return Charset of the body text
chris@3
   232
   */
chris@3
   233
  private Charset getBodyCharset()
chris@3
   234
  {
chris@3
   235
    // We espect something like 
chris@3
   236
    // Content-Type: text/plain; charset=ISO-8859-15
chris@3
   237
    String contentType = getHeader(Headers.CONTENT_TYPE)[0];
chris@3
   238
    int idxCharsetStart = contentType.indexOf("charset=") + "charset=".length();
chris@3
   239
    int idxCharsetEnd   = contentType.indexOf(";", idxCharsetStart);
chris@3
   240
    
chris@3
   241
    String charsetName = "UTF-8";
chris@3
   242
    if(idxCharsetStart >= 0 && idxCharsetStart < contentType.length())
chris@3
   243
    {
chris@3
   244
      if(idxCharsetEnd < 0)
chris@3
   245
      {
chris@3
   246
        charsetName = contentType.substring(idxCharsetStart);
chris@3
   247
      }
chris@3
   248
      else
chris@3
   249
      {
chris@3
   250
        charsetName = contentType.substring(idxCharsetStart, idxCharsetEnd);
chris@3
   251
      }
chris@3
   252
    }
chris@3
   253
    
chris@3
   254
    // Sometimes there are '"' around the name
chris@3
   255
    if(charsetName.length() > 2 &&
chris@3
   256
      charsetName.charAt(0) == '"' && charsetName.endsWith("\""))
chris@3
   257
    {
chris@3
   258
      charsetName = charsetName.substring(1, charsetName.length() - 2);
chris@3
   259
    }
chris@3
   260
    
chris@3
   261
    // Create charset
chris@3
   262
    Charset charset = Charset.forName("UTF-8"); // This MUST be supported by JVM
chris@3
   263
    try
chris@3
   264
    {
chris@3
   265
      charset = Charset.forName(charsetName);
chris@3
   266
    }
chris@3
   267
    catch(Exception ex)
chris@3
   268
    {
cli@16
   269
      Log.get().severe(ex.getMessage());
cli@16
   270
      Log.get().severe("Article.getBodyCharset(): Unknown charset: " + charsetName);
chris@3
   271
    }
chris@3
   272
    return charset;
chris@3
   273
  }
chris@3
   274
  
chris@3
   275
  /**
chris@3
   276
   * @return Numerical IDs of the newsgroups this Article belongs to.
chris@3
   277
   */
chris@3
   278
  public List<Group> getGroups()
chris@3
   279
  {
chris@3
   280
    String[]         groupnames = getHeader(Headers.NEWSGROUPS)[0].split(",");
chris@3
   281
    ArrayList<Group> groups     = new ArrayList<Group>();
chris@3
   282
chris@3
   283
    try
chris@3
   284
    {
chris@3
   285
      for(String newsgroup : groupnames)
chris@3
   286
      {
chris@3
   287
        newsgroup = newsgroup.trim();
chris@3
   288
        Group group = StorageManager.current().getGroup(newsgroup);
chris@3
   289
        if(group != null &&         // If the server does not provide the group, ignore it
chris@3
   290
          !groups.contains(group))  // Yes, there may be duplicates
chris@3
   291
        {
chris@3
   292
          groups.add(group);
chris@3
   293
        }
chris@3
   294
      }
chris@3
   295
    }
chris@3
   296
    catch(StorageBackendException ex)
chris@3
   297
    {
chris@3
   298
      ex.printStackTrace();
chris@3
   299
      return null;
chris@3
   300
    }
chris@3
   301
    return groups;
chris@3
   302
  }
chris@3
   303
chris@3
   304
  public void setBody(byte[] body)
chris@3
   305
  {
chris@3
   306
    this.body = body;
chris@3
   307
  }
chris@3
   308
  
chris@3
   309
  /**
chris@3
   310
   * 
chris@3
   311
   * @param groupname Name(s) of newsgroups
chris@3
   312
   */
chris@3
   313
  public void setGroup(String groupname)
chris@3
   314
  {
chris@3
   315
    this.headers.setHeader(Headers.NEWSGROUPS, groupname);
chris@3
   316
  }
chris@3
   317
chris@3
   318
  /**
chris@3
   319
   * Returns the Message-ID of this Article. If the appropriate header
chris@3
   320
   * is empty, a new Message-ID is created.
chris@3
   321
   * @return Message-ID of this Article.
chris@3
   322
   */
chris@3
   323
  public String getMessageID()
chris@3
   324
  {
chris@3
   325
    String[] msgID = getHeader(Headers.MESSAGE_ID);
chris@3
   326
    return msgID[0].equals("") ? generateMessageID() : msgID[0];
chris@3
   327
  }
chris@3
   328
  
chris@3
   329
  /**
chris@3
   330
   * @return String containing the Message-ID.
chris@3
   331
   */
chris@3
   332
  @Override
chris@3
   333
  public String toString()
chris@3
   334
  {
chris@3
   335
    return getMessageID();
chris@3
   336
  }
chris@3
   337
chris@3
   338
}