// MboxParser.java - read in a file with a series of MIME email messages, break out messages. // By Rowland http://home.comcast.net/~rowland3/ import java.io.*; import java.util.*; class Joiner { public static String join(Object[] lines, String inbetween) { int count= lines.length; StringBuffer sb= new StringBuffer(""); int c; for (c= 0; c< count-1; c++) { sb.append((String)lines[c]); sb.append(inbetween); } if (count> 0) sb.append((String)lines[count-1]); return sb.toString(); } public static final String join(String[] lines) { return join(lines, "\n"); } public static final String join(Vector vlines, String inbetween) { return join(vlines.toArray(), inbetween); } public static final String join(Vector vlines) { return join(vlines.toArray(), "\n"); } } //------------------------------------------------------------------------------ public class MboxParser { protected LineNumberReader in; protected boolean expectingStart; protected boolean inMessageBody= false; // Components of the message being harvested: protected String from= null; protected String year= null; protected String subject= null; protected String messageid= null; protected Vector messageBody= null; public MboxParser(String fileSpec) throws Exception { File file= new File(fileSpec); if (!file.exists()) throw new IOException("No such file: "+ fileSpec); Reader reader= new InputStreamReader(new FileInputStream(fileSpec)); in= new LineNumberReader(reader); run(); } public MboxParser(InputStreamReader reader) throws Exception { in= new LineNumberReader(reader); run(); } protected void run() { String line= null; try { expectingStart= true; while ((line=in.readLine())!= null) doLine(line); if (inMessageBody) doMessageEnd(); in.close(); } catch (Exception E) { System.err.println("ERR MboxParser run: "+ E.toString()); System.err.println(" line: "+ line); E.printStackTrace(); } } protected void doLine(String line) { if (line.length()== 0) { expectingStart= true; if (inMessageBody) doMessageBodyLine(line); else inMessageBody= true; // end of header } else if (expectingStart && line.startsWith("From ")) doFromLine(line); else if (!inMessageBody && line.startsWith("Subject: ")) { expectingStart= false; subject= line.substring(9); } else if (!inMessageBody && line.startsWith("Message-ID: ")) { expectingStart= false; messageid= line.substring(12); } else { expectingStart= false; doMessageBodyLine(line); } } protected final void doFromLine(String line) { if (messageBody!= null) doMessageEnd(); // process previous message StringTokenizer st= new StringTokenizer(line); Vector v= new Vector(); while (st.hasMoreTokens()) { String field= st.nextToken(); v.add(field); } st= null; Object[] fields= v.toArray(); String y= (String)fields[fields.length-1]; char c= y.charAt(0); if (c< '0' || c> '2') { //System.err.println("Bad year: "+ year); if (inMessageBody) doMessageBodyLine(line); // false positive return; } from = (String)fields[1]; year= y; v= null; messageBody= new Vector(); inMessageBody= false; expectingStart= false; } protected final void doMessageBodyLine(String line) { if (messageBody== null) messageBody= new Vector(); //System.out.println("doMessageBodyLine: "+ line); messageBody.add(line); } protected final void doMessageEnd() { String body= Joiner.join(messageBody, "\n"); if (messageid== null) messageid= from+year+subject; // better than nothing doMessageCallback(messageid, from, year, subject, body); messageBody= null; inMessageBody= false; messageid= null; } protected void doMessageCallback(String messageid, String from, String year, String subject, String body) { System.out.println("From: "+ from+ " year: "+ year+ " "+ subject); } }