001 package org.wdssii.webindex.servlet; 002 003 import java.io.BufferedReader; 004 import java.io.File; 005 import java.io.FileReader; 006 import java.io.FilenameFilter; 007 import java.io.IOException; 008 import java.util.ArrayList; 009 import java.util.Arrays; 010 import java.util.Collection; 011 import java.util.HashMap; 012 import java.util.List; 013 014 import org.apache.commons.logging.Log; 015 import org.apache.commons.logging.LogFactory; 016 import org.wdssii.core.fam.FamIndexHelper; 017 import org.wdssii.core.fam.FamIndexHelperLsImpl; 018 019 public class WebIndexDAO { 020 private String source; 021 private String indexDir; 022 private long targetNumberOfRecords; 023 private IndexRecordList currentRecordList = null; 024 private int recordsRead = 0; // a counter to ensure unique id for each record 025 private HashMap<String, IndexRecordBean> filename_to_record = new HashMap<String, IndexRecordBean>(); 026 private HashMap<IndexRecordBean, String> record_to_filename = new HashMap<IndexRecordBean, String>(); 027 private final Log log = LogFactory.getLog(WebIndexDAO.class); 028 029 private FamIndexHelper famIndexHelper = new FamIndexHelperLsImpl(); 030 031 private static class FileToRead implements Comparable<FileToRead> { 032 private File file; 033 private int numAttemptsLeft = 3; 034 private FileToRead(File f){ 035 this.file = f; 036 } 037 @Override 038 public int compareTo(FileToRead o) { 039 return this.file.compareTo(o.file); 040 } 041 public static FileToRead[] createList(File[] files){ 042 FileToRead[] result = new FileToRead[files.length]; 043 for (int i=0; i < files.length; ++i){ 044 result[i] = new FileToRead(files[i]); 045 } 046 return result; 047 } 048 } 049 private List<FileToRead> filesToRead = new ArrayList<FileToRead>(); 050 051 public String getSource() { 052 return source; 053 } 054 055 public void setSource(String source) { 056 this.source = source; 057 this.currentRecordList = null; // so that it is init-ed the next time 058 } 059 060 public String getIndexDir() { 061 return indexDir; 062 } 063 064 public void setIndexDir(String indexDir) { 065 this.indexDir = indexDir; 066 } 067 068 public long getTargetNumberOfRecords() { 069 return targetNumberOfRecords; 070 } 071 072 public void setTargetNumberOfRecords(long n) { 073 this.targetNumberOfRecords = n; 074 } 075 076 public void setFamIndexHelper(FamIndexHelper helper){ 077 this.famIndexHelper = helper; 078 } 079 080 public FamIndexHelper getFamIndexHelper(){ 081 return famIndexHelper; 082 } 083 084 protected void init(){ 085 currentRecordList = IndexRecordList.newInstance(this.source); 086 File[] files = famIndexHelper.getInitialFiles(indexDir); 087 update(FileToRead.createList(files)); 088 } 089 090 091 private IndexRecordBean extractContent(FileToRead fileToRead) { 092 String filename = fileToRead.file.getAbsolutePath(); 093 BufferedReader reader = null; 094 try { 095 if (log.isDebugEnabled()) { 096 log.debug("Reading " + filename); 097 } 098 reader = new BufferedReader(new FileReader(filename)); 099 StringBuffer content = new StringBuffer(); 100 String line; 101 do { 102 line = reader.readLine(); 103 if (line != null) { 104 content.append(line); 105 } 106 } while (line != null); 107 IndexRecordBean to = new IndexRecordBean(filename, content 108 .toString(), ++recordsRead); 109 return to; 110 } catch (IOException e) { 111 log.warn("Problem reading " + filename + ": " + e + " will try again " + fileToRead.numAttemptsLeft + " times"); 112 } finally { 113 try { 114 if (reader != null) 115 reader.close(); 116 } catch (Exception e) {/* ok */ 117 } 118 } 119 return null; 120 } 121 122 private void update(FileToRead[] newfiles){ 123 // sort the files alphabetically 124 Arrays.sort(newfiles); 125 126 // read all the new files and add to map 127 for (FileToRead f : newfiles) { 128 IndexRecordBean rec = extractContent(f); 129 if (rec != null){ 130 addRecordToMaps(f.file.getAbsolutePath(), rec); 131 } else { 132 f.numAttemptsLeft --; 133 if (f.numAttemptsLeft > 0){ 134 filesToRead.add(f); 135 } 136 } 137 } 138 139 // change the current TO to reflect new records 140 Collection<IndexRecordBean> records = filename_to_record.values(); 141 currentRecordList.setRecords(records); 142 143 // if 50% more than target, start to prune 144 prune(); 145 } 146 147 /** Invoked on every user request for currentRecordList. */ 148 private final void update() { 149 if ( currentRecordList == null ){ 150 init(); 151 } 152 else{ 153 File[] newfiles = famIndexHelper.getNewFiles(); 154 for (int i=0; i < newfiles.length; ++i){ 155 filesToRead.add(new FileToRead(newfiles[i])); 156 } 157 if (filesToRead.size() > 0){ 158 FileToRead[] toread = filesToRead.toArray(new FileToRead[0]); 159 filesToRead.clear(); 160 update(toread); 161 } 162 } 163 } 164 165 private void addRecordToMaps(String filename, IndexRecordBean rec) { 166 if (rec != null){ 167 filename_to_record.put(filename, rec); 168 record_to_filename.put(rec, filename); 169 } 170 } 171 172 private void prune(){ 173 long maxNumberOfRecords = targetNumberOfRecords + targetNumberOfRecords / 2; 174 if ( currentRecordList.getRecords().length > maxNumberOfRecords ){ 175 if ( log.isInfoEnabled() ){ 176 log.info("Pruning " + source + "'s index to " + targetNumberOfRecords + " from " + currentRecordList.getRecords().length); 177 } 178 currentRecordList.pruneToMaxSize(targetNumberOfRecords); 179 famIndexHelper.pruneToMaxSize(maxNumberOfRecords, targetNumberOfRecords); 180 181 // clear out the old maps 182 HashMap<IndexRecordBean, String> record_to_filename_old = new HashMap<IndexRecordBean, String>(record_to_filename); 183 record_to_filename.clear(); 184 filename_to_record.clear(); 185 186 // repopulate the maps based on the existing records 187 IndexRecordBean[] records = currentRecordList.getRecords(); 188 for (IndexRecordBean record : records){ 189 String filename = record_to_filename_old.get(record); 190 addRecordToMaps(filename, record); 191 } 192 } 193 } 194 195 196 /** 197 * @return a TransferObject that clients own. Changes to the TO will not 198 * affect this business object in any way. 199 * See the constants in IndexRecordList for special values that lastRead can take. 200 */ 201 public synchronized IndexRecordList getCurrentRecordList(long lastRead) { 202 this.update(); 203 return currentRecordList.newInstance(lastRead); 204 } 205 206 }