熱點推薦:
您现在的位置: 電腦知識網 >> 編程 >> Java編程 >> Java核心技術 >> 正文

用Lucene做一個簡單的Java搜索工具

2013-11-23 18:52:14  來源: Java核心技術 

  初學Lucene剛接觸搜索引擎知道了一點點想做個小工具實現根據單詞搜索某個java源文件比如輸入String去查詢某些java源文件裡用到了這個類

  這個想法的來源是在以前剛學java時有一本java基礎教程的書的附帶光盤裡有作者寫的一個程序可以方便初學者查找某些類在哪個實例裡出現當時沒有太在意覺得作者的代碼很長所以現在想自己也寫一個這樣的小程序

  開發工具與運行環境使用Lucene的包jdk在WindowsXP下運行

  思路分析與設計

  整個程序裡除了Lucene的必要操作外就是IO的基本操作了因為要對某目錄下及其子目錄下的所有Java源文件進行索引就要用到遞歸同時要過濾掉非Java源文件根據這種情況設計了以下個類




 
主類索引類(IndexJavaFiles)搜索類(SearchJavaFiles)
異常類索引異常類(IndexException)搜索異常類(SearchException)
還有一個文件過濾工廠類(FileFilterFactory)

  異常類不是必要的特意設計來包裝IO異常文件異常和Lucene的異常文件過濾工廠類的出現並不是故弄玄虛只是不想太多代碼集中一起就把文件過慮器的設計放到一個類裡下面是程序的完整代碼及注釋


 
IndexJavaFilesjava
/**
 *indexthejavasourcefiles
 */
package powerwind;
 
import javaio*;
import javautilDate;
 
import orgapachelucenedocument*;
import orgapacheluceneindexIndexWriter;
 
/**
 *@authorPowerwind
 *@version
 */
publicclass IndexJavaFiles {
 
    /**
     *默認構造方法
     */
    public IndexJavaFiles() {
    }
 
    /**
     * 這個私有遞歸方法由index方法調用保證index傳入的file是目錄不是文件
     *
     *@paramwriter
     *@paramfile
     *@paramff
     *@throwsIndexException
     */
    privatevoid indexDirectory(IndexWriter writer File file FileFilter filter)throws IndexException {
       if (fileisDirectory()) {
           // 有選擇地(過濾)獲取目錄下的文件和目錄
           File[] files = filelistFiles(filter);
           // 非空目錄
           if (files != null) {
              for (int i = ; i < fileslength; i++) {
                  indexDirectory(writer files[i] filter);
              }
           }
       } else {
           try {
             // 這裡的file經過先前的過濾
              writeraddDocument(parseFile(file));
              Systemoutprintln(增加文件 + file);
           } catch (IOException ioe) {
              thrownew IndexException(ioegetMessage());
           }
       }
    }
 
    /**
     *傳參數是文件就直接索引若是目錄則交給indexDirectory遞歸
     *
     *@paramwriter
     *@paramfile
     *@paramff
     *@throwsIndexException
     */
    publicvoid index(IndexWriter writer File file FileFilter filter) throws IndexException {
       // 確定可讀
       if (fileexists() && filecanRead()) {
           if (fileisDirectory()) {
              indexDirectory(writer file filter);
           } elseif (filteraccept(file)) {
              try {
                  writeraddDocument(parseFile(file));
                  Systemoutprintln(增加文件 + file);
              } catch (IOException ioe) {
                  thrownew IndexException(ioegetMessage());
              }
           } else {
              Systemoutprintln(指定文件或目錄錯誤沒有完成索引);
           }
       }
    }
 
    /**
     *@paramfile
     *
     *把File變成Document
     */
    private Document parseFile(File file) throws IndexException {
       Document doc = new Document();
       docadd(new Field(path filegetAbsolutePath() FieldStoreYES
                     FieldIndexUN_TOKENIZED));
       try {
           docadd(new Field(contents new FileReader(file)));
       } catch (FileNotFoundException fnfe) {
           thrownew IndexException(fnfegetMessage());
       }
       return doc;
    }
}
 
 

index(IndexWriter writer File file FileFilter filter)調用私有方法indexDirectory(IndexWriter writer File file FileFilter filter)完成文件的索引
下面是IndexException異常類
IndexExceptionjava
package powerwind;
 
publicclass IndexException extends Exception {
 
    public IndexException(String message) {
       super(Throw IndexException while indexing files: + message);
    }
 
}
下面是FileFilterFactory類返回一個特定的文件過濾器(FileFilter)
FileFilterFactoryjava
package powerwind;
 
import javaio*;
 
publicclass FileFilterFactory {
    /**
     *靜態匿名內部類
     */
    privatestatic FileFilter filter = new FileFilter() {
       publicboolean accept(File file) {
           long len;
           return fileisDirectory()||
                   (filegetName()endsWith(java) &&
                   ((len = filelength()) > ) && len < * );
       }
    };
    publicstatic FileFilter getFilter() {
       returnfilter;
    }
}
 
main方法
    /**
     *      main方法
     */
    publicstaticvoid main(String[] args) throws Exception {
       IndexJavaFiles ijf = new IndexJavaFiles();
       Date start = new Date();
       try {
           IndexWriter writer = IndexWriterFactorynewInstance()createWriter(/index true);
           Systemoutprintln(Indexing );
           ijfindex(writer new File() FileFilterFactorygetFilter());
           Systemoutprintln(Optimizing);
           writeroptimize();
           writerclose();
 
           Date end = new Date();
           Systemoutprintln(endgetTime() startgetTime() + total milliseconds);
 
       } catch (IOException e) {
           Systemoutprintln( caught a + egetClass() + \n with message: + egetMessage());
       }
    }
 
 
SearchJavaFilesjava
package powerwind;
 
import javaio*;
 
import orgapacheluceneanalysisAnalyzer;
import orgapacheluceneanalysisstandardStandardAnalyzer;
import orgapachelucenedocumentDocument;
import orgapacheluceneindexIndexReader;
import orgapachelucenequeryParser*;
import orgapachelucenesearch*;
 
publicclass SearchJavaFiles {
    private IndexSearcher searcher;
 
    private QueryParser parser;
 
    /**
     *
     *@paramsearcher
     */
    public SearchJavaFiles(IndexSearcher searcher) {
       thissearcher = searcher;
    }
 
    /**
     *
     *@paramfield
     *@paramanalyzer
     */
    publicvoid setParser(String field Analyzer analyzer) {
       setParser(new QueryParser(field analyzer));
    }
 
    /**
     *@paramparser
     */
    publicvoid setParser(QueryParser parser) {
       thisparser = parser;
    }
 
    /**
     *
     *@paramquery
     *@returnHits
     *@throwsSearchException
     */
    public Hits serach(Query query) throws SearchException {
       try {
           returnsearchersearch(query);
       } catch (IOException ioe) {
           thrownew SearchException(ioegetMessage());
       }
    }
 
    /**
     *
     *@paramqueryString
     *@returnHits
     *@throwsSearchException
     */
    public Hits serach(String queryString) throws SearchException {
       if (parser == null)
           thrownew SearchException(parser is null!);
       try {
           returnsearchersearch(parserparse(queryString));
       } catch (IOException ioe) {
           thrownew SearchException(ioegetMessage());
       } catch (ParseException pe) {
           thrownew SearchException(pegetMessage());
       }
    }
 
    /**
     *
     *輸出hits的結果從start開始到end不包括end
     *
     *@paramhits
     *@paramstart
     *@paramend
     *@throwsSearchException
     */
    publicstatic Hits display(Hits hits int start int end) throws SearchException {
       try {
           while (start < end) {
              Document doc = hitsdoc(start);
              String path = docget(path);
              if (path != null) {
                  Systemoutprintln((start + ) + + path);
              } else {
                  Systemoutprintln((start + ) + + No such path);
              }
              start++;
           }
       } catch (IOException ioe) {
           thrownew SearchException(ioegetMessage());
       }
       return hits;
    }
 

main方法
    /**
     *@paramargs
     */
    publicstaticvoid main(String[] args) throws Exception {
 
       String field = contents;
       String index = /index;
       finalint rows_per_page = ;
       finalchar NO = n;
 
       SearchJavaFiles sjf = new SearchJavaFiles(new IndexSearcher(IndexReaderopen(index)));
       sjfsetParser(field new StandardAnalyzer());
       BufferedReader in = new BufferedReader(new InputStreamReader(Systemin UTF));
 
       while (true) {
           Systemoutprintln(Query: );
           String line = inreadLine();
           if (line == null || linelength() < ) {
              Systemoutprintln(eixt query);
              break;
           }
           Hits hits = sjfserach(line);
           Systemoutprintln(searching for + line + Result is );
 
           int len = hitslength();
           int i = ;
           if (len > )
              while (true) {
                  if (i + rows_per_page >= len) {
                     SearchJavaFilesdisplay(hits i len);
                     break;
                  } else {
                     SearchJavaFilesdisplay(hits i i += rows_per_page);
                     Systemoutprintln(more y/n?);
                     line = inreadLine();
                     if (linelength() < || linecharAt() == NO)
                         break;
                  }
              }
           else
              Systemoutprintln(not found);
       }
    }
}
 
SearchExceptionjava
package powerwind;
 
publicclass SearchException extends Exception {
 
    public SearchException(String message) {
       super(Throw SearchException while searching files: + message);
    }
 
}

 


 
完善設想
文件格式
能夠處理Zip文件Jar文件索引裡面的java源文件
通過反射機制索引class類文件
輸入輸出
除控制台輸入輸出外還可以選擇從文件讀取查詢關鍵字輸出查詢結果到文件
用戶界面
圖形界面操作雙擊查詢結果的某條記錄可以打開相應文件
性能方面
索引文件時用緩存和多線程處理


From:http://tw.wingwit.com/Article/program/Java/hx/201311/25898.html
    推薦文章
    Copyright © 2005-2013 電腦知識網 Computer Knowledge   All rights reserved.