在路上

 找回密码
 立即注册
在路上 站点首页 学习 查看内容

java+lucene中文分词,搜索引擎搜词剖析

2016-8-16 12:48| 发布者: zhangjf| 查看: 629| 评论: 0

摘要: 我想只要是学过数据库的孩纸,不管是mysql,还是sqlsever,一提到查找,本能的想到的便是like关键字,其实去转盘网(分类模式)之前也是采用这种算法,但我可以告诉大家一个很不幸的事情,like匹配其实会浪费大量的 ...

我想只要是学过数据库的孩纸,不管是mysql,还是sqlsever,一提到查找,本能的想到的便是like关键字,其实去转盘网(分类模式)之前也是采用这种算法,但我可以告诉大家一个很不幸的事情,like匹配其实会浪费大量的有用资源,原因这里不说了请自己想一想,我们还是直接摆事实验证。

现在用去转盘网搜:hello 找个单词,如下:

http://www.quzhuanpan.com/source/search.action?q=hello¤tPage=1

翻页你会发现只要是包含hello的单词都找到了,但是如果你用like的话是不会有这个效果的,不信让我们再看一下,还好他来说电影网的分词算法我还没来得及修改,还可以看到现象:

http://www.talaishuo.com/searchResult.do?searchFileName=hello

你会发现只有开始包含hello这个字段的搜索串才能得到匹配,这就问题来了,数据库中大量的资源岂不是白白浪费了,不过没事,伟大的人类还是很聪明的,发明了分词,分词的原理我就不讲了,请自己百度吧,还是直接上代码,提示,这里需要四个jar包作为工具,我先上传的去转盘,想要做分词的请先下载:

分词包下载地址1:http://www.quzhuanpan.com/home/sourceList.jsp?type=6

分词包下载地址2:http://www.quzhuanpan.com/download/checkResult.action?id=34&type=6

直接看代码:

package com.tray.indexData;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.tray.bean.SerachResult;
import com.tray.common.tools.DateFormater;

public class LuceneSearch {

  1. private static String DISC_URL = "/home/indexData/data";
  2. static {
  3. String os = System.getProperty("os.name");
  4. if(os.toLowerCase().startsWith("win")){
  5. DISC_URL = "E:\indexData\data";
  6. }
  7. else{
  8. DISC_URL ="/home/indexData/data";
  9. }
  10. }
  11. //指定分词器
  12. private Analyzer analyzer=new IKAnalyzer();
  13. private static Directory directory;
  14. //配置
  15. private static IndexWriterConfig iwConfig;
  16. //配置IndexWriter
  17. private static IndexWriter writer;
  18. private static File indexFile = null;
  19. private static Version version = Version.LUCENE_36;
  20. private final int PAPGESIZE=10;
  21. /**
复制代码

全量索引

@Author haoning
*/

void init() throws Exception {

  1. try {
  2. indexFile = new File(DISC_URL);
  3. if (!indexFile.exists()) {
  4. indexFile.mkdir();
  5. }
  6. directory=FSDirectory.open(indexFile);
  7. //配置IndexWriterConfig
  8. iwConfig = new IndexWriterConfig(version,analyzer);
  9. iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
  10. //创建写索引对象
  11. writer = new IndexWriter(directory,iwConfig);
  12. } catch (Exception e) {
  13. }
复制代码

}

void closeWriter(){

  1. try {
  2. writer.close();
  3. } catch (CorruptIndexException e) {
  4. e.printStackTrace();
  5. } catch (IOException e) {
  6. e.printStackTrace();
  7. }
复制代码

}

void commit(){

  1. try {
  2. writer.commit();
  3. } catch (CorruptIndexException e) {
  4. e.printStackTrace();
  5. } catch (IOException e) {
  6. e.printStackTrace();
  7. }
复制代码

}

/**

一个一个索引

@Author haoning
*/

void singleIndex(Document doc) throws Exception {

  1. writer.addDocument(doc);
复制代码

}

/**

一个跟新

@Author haoning
*/

void singleUpdate(Document doc) throws Exception {

  1. Term term = new Term("url", doc.get("url"));
  2. writer.updateDocument(term,doc);
复制代码

}

/**

全量索引

@Author haoning
*/

void fullIndex(Document[] documentes) throws Exception {

  1. writer.deleteAll();
  2. for (Document document : documentes) {
  3. writer.addDocument(document);
  4. }
  5. writer.commit();
复制代码

}

/**

根据id删除索引

@Author haoning
*/

void deleteIndex(Document document)throws Exception{

  1. Term term = new Term("url", document.get("url"));//url才是唯一标志
  2. writer.deleteDocuments(term);
  3. writer.commit();
复制代码

}

/**

根据id增量索引

@Author haoning
*/

void updateIndex(Document[] documentes) throws Exception{

  1. for (Document document : documentes) {
  2. Term term = new Term("url", document.get("url"));
  3. writer.updateDocument(term, document);
  4. }
  5. writer.commit();
复制代码

}

/**

直接查询

@Author haoning
*/

void simpleSearch(String filedStr,String queryStr,int page, int pageSize) throws Exception{

  1. File indexDir = new File(DISC_URL);
  2. //索引目录
  3. Directory dir=FSDirectory.open(indexDir);
  4. //根据索引目录创建读索引对象
  5. IndexReader reader = IndexReader.open(dir);
  6. //搜索对象创建
  7. IndexSearcher searcher = new IndexSearcher(reader);
  8. TopScoreDocCollector topCollector = TopScoreDocCollector.create(searcher.maxDoc(), false);
  9. Term term = new Term(filedStr, queryStr);
  10. Query query = new TermQuery(term);
  11. searcher.search(query, topCollector);
  12. ScoreDoc[] docs = topCollector.topDocs((page-1)*pageSize, pageSize).scoreDocs;
  13. printScoreDoc(docs, searcher);
复制代码

}

/**

高亮查询

@Author haoning
*/

Map highLightSearch(String filed,String keyWord,int curpage, int pageSize) throws Exception{

  1. List<SerachResult> list=new ArrayList<SerachResult>();
  2. Map<String,Object> map = new HashMap<String,Object>();
  3. if (curpage <= 0) {
  4. curpage = 1;
  5. }
  6. if (pageSize <= 0 || pageSize>20) {
  7. pageSize = PAPGESIZE;
  8. }
  9. File indexDir = new File(DISC_URL); //索引目录
  10. Directory dir=FSDirectory.open(indexDir);//根据索引目录创建读索引对象
  11. IndexReader reader = IndexReader.open(dir);//搜索对象创建
  12. IndexSearcher searcher = new IndexSearcher(reader);
  13. int start = (curpage - 1) * pageSize;
  14. Analyzer analyzer = new IKAnalyzer(true);
  15. QueryParser queryParser = new QueryParser(Version.LUCENE_36, filed, analyzer);
  16. queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
  17. Query query = queryParser.parse(keyWord);
  18. int hm = start + pageSize;
  19. TopScoreDocCollector res = TopScoreDocCollector.create(hm, false);
  20. searcher.search(query, res);
  21. SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
  22. Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
  23. long amount = res.getTotalHits();
  24. //long pages = (rowCount - 1) / pageSize + 1; //计算总页数
  25. map.put("amount",amount);//总共多少条记录
  26. TopDocs tds = res.topDocs(start, pageSize);
  27. ScoreDoc[] sd = tds.scoreDocs;
  28. for (int i = 0; i < sd.length; i++) {
  29. Document doc = searcher.doc(sd[i].doc);
  30. String temp=doc.get("name");
  31. //做高亮处理
  32. TokenStream ts = analyzer.tokenStream("name", new StringReader(temp));
  33. SerachResult record=new SerachResult();
  34. String name = highlighter.getBestFragment(ts,temp);
  35. String skydirverName=doc.get("skydirverName");
  36. String username=doc.get("username");
  37. String shareTime=doc.get("shareTime");
  38. String describ=doc.get("describ");
  39. String typeId=doc.get("typeId");
  40. String id=doc.get("id");
  41. String url=doc.get("url");
  42. record.setName(name);
  43. record.setSkydriverName(skydirverName);
  44. record.setUsername(username);
  45. record.setShareTime(DateFormater.getFormatDate(shareTime,"yyyy-MM-dd HH:mm:ss"));
  46. record.setDescrib(describ);
  47. record.setTypeId(Integer.parseInt(typeId));
  48. record.setId(new BigInteger(id));
  49. record.setUrl(url);
  50. list.add(record);
  51. /*System.out.println("name:"+name);
  52. System.out.println("skydirverName:"+skydirverName);
  53. System.out.println("username:"+username);
  54. System.out.println("shareTime:"+shareTime);
  55. System.out.println("describ:"+describ);
  56. System.out.println("typeId:"+typeId);
  57. System.out.println("id:"+id);
  58. System.out.println("url:"+url);*/
  59. }
  60. map.put("source",list);
  61. return map;
复制代码

}

/**

根据前缀查询

@Author haoning
*/

void prefixSearch(String filedStr,String queryStr) throws Exception{

  1. File indexDir = new File(DISC_URL);
  2. //索引目录
  3. Directory dir=FSDirectory.open(indexDir);
  4. //根据索引目录创建读索引对象
  5. IndexReader reader = IndexReader.open(dir);
  6. //搜索对象创建
  7. IndexSearcher searcher = new IndexSearcher(reader);
  8. Term term = new Term(filedStr, queryStr);
  9. Query query = new PrefixQuery(term);
  10. ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
  11. printScoreDoc(docs, searcher);
复制代码

}

/**

通配符查询

@Author haoning
*/

void wildcardSearch(String filedStr,String queryStr) throws Exception{

  1. File indexDir = new File(DISC_URL);
  2. //索引目录
  3. Directory dir=FSDirectory.open(indexDir);
  4. //根据索引目录创建读索引对象
  5. IndexReader reader = IndexReader.open(dir);
  6. //搜索对象创建
  7. IndexSearcher searcher = new IndexSearcher(reader);
  8. Term term = new Term(filedStr, queryStr);
  9. Query query = new WildcardQuery(term);
  10. ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
  11. printScoreDoc(docs, searcher);
复制代码

}

/**

分词查询

@Author haoning
*/

void analyzerSearch(String filedStr,String queryStr) throws Exception{

  1. File indexDir = new File(DISC_URL);
  2. //索引目录
  3. Directory dir=FSDirectory.open(indexDir);
  4. //根据索引目录创建读索引对象
  5. IndexReader reader = IndexReader.open(dir);
  6. //搜索对象创建
  7. IndexSearcher searcher = new IndexSearcher(reader);
  8. QueryParser queryParser = new QueryParser(version, filedStr, analyzer);
  9. Query query = queryParser.parse(queryStr);
  10. ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
  11. printScoreDoc(docs, searcher);
复制代码

}

/**

多属性分词查询

@Author haoning
*/

void multiAnalyzerSearch(String[] filedStr,String queryStr) throws Exception{

  1. File indexDir = new File(DISC_URL);
  2. //索引目录
  3. Directory dir=FSDirectory.open(indexDir);
  4. //根据索引目录创建读索引对象
  5. IndexReader reader = IndexReader.open(dir);
  6. //搜索对象创建
  7. IndexSearcher searcher = new IndexSearcher(reader);
  8. QueryParser queryParser = new MultiFieldQueryParser(version, filedStr, analyzer);
  9. Query query = queryParser.parse(queryStr);
  10. ScoreDoc[] docs = searcher.search(query, 3).scoreDocs;
  11. printScoreDoc(docs, searcher);
复制代码

}

void printScoreDoc(ScoreDoc[] docs,IndexSearcher searcher)throws Exception{

  1. for (int i = 0; i < docs.length; i++) {
  2. List<Fieldable> list = searcher.doc(docs[i].doc).getFields();
  3. for (Fieldable fieldable : list) {
  4. String fieldName = fieldable.name();
  5. String fieldValue = fieldable.stringValue();
  6. System.out.println(fieldName+" : "+fieldValue);
  7. }
  8. }
复制代码

}
}

注意由于去转盘网(http://www.quzhuanpan.com)是部署到linux上的,所以DISC_URL可以更具系统变换,我是通过url来判定索引文件是否唯一的,你可以更具id来判断,具体情况具体对待吧。
package com.tray.indexData;

import java.sql.SQLException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import com.mysql.jdbc.Connection;
import com.mysql.jdbc.ResultSet;
import com.mysql.jdbc.Statement;

public class IndexFile {

  1. private static Connection conn = null;
  2. private static Statement stmt = null;
  3. private final int NUM=500000;
  4. private LuceneSearch ls;
  5. private long count=0;
  6. public ResultSet deal6SourceTable(String tableName) throws SQLException{
  7. String sql = "SELECT distinct `NAME`,SKYDRIVER_NAME,USERNAME,SHARE_TIME,DESCRIB,TYPE_ID,ID,URL FROM "+tableName+" where STATUS=1 and TYPE_ID !='-1' and (TYPE_NAME is null or TYPE_NAME!=1) limit "+NUM;
  8. //System.out.println(sql);
  9. ResultSet rs = (ResultSet) stmt.executeQuery(sql);
  10. return rs;
  11. }
  12. public void update6SourceTable(String tableName) throws SQLException{
  13. Statement st = (Statement) conn.createStatement();
  14. String sql = "update "+tableName+" set TYPE_NAME=1 where STATUS=1 and TYPE_ID !='-1' and (TYPE_NAME is null or TYPE_NAME!=1) limit "+NUM;
  15. //System.out.println("update"+sql);
  16. try {
  17. st.executeUpdate(sql);
  18. } catch (SQLException e) {
  19. e.printStackTrace();
  20. }
  21. }
  22. public void indexInit(){//数据库+lcene初始化
  23. conn = (Connection) JdbcUtil.getConnection();
  24. if(conn == null) {
  25. try {
  26. throw new Exception("数据库连接失败!");
  27. } catch (Exception e) {
  28. e.printStackTrace();
  29. }
  30. }
  31. ls=new LuceneSearch();
  32. try {
  33. ls.init();
  34. } catch (Exception e2) {
  35. e2.printStackTrace();
  36. }
  37. }
  38. public void indexEnd(){//数据库+lcene关闭
  39. ls.closeWriter();
  40. try {
  41. conn.close();//关闭数据库
  42. } catch (SQLException e) {
  43. e.printStackTrace();
  44. }
  45. }
  46. public void Index6Data() throws SQLException{
  47. try {
  48. stmt = (Statement) conn.createStatement();
  49. } catch (SQLException e1) {
  50. e1.printStackTrace();
  51. }
  52. ResultSet r1=null;
  53. ResultSet r2=null;
  54. ResultSet r3=null;
  55. ResultSet r4=null;
  56. ResultSet r5=null;
  57. ResultSet r6=null;
  58. boolean stop=false;
  59. do{
  60. r1=deal6SourceTable("film_and_tv_info");
  61. stop=this.createIndex(r1,ls,"1"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  62. if(!stop){
  63. ls.commit();//加个判断条件
  64. }
  65. //System.out.println("stop"+stop);
  66. }while(!stop);
  67. stop=false;
  68. do{
  69. r2=deal6SourceTable("music_and_mv_info");
  70. stop=this.createIndex(r2,ls,"2"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  71. if(!stop){
  72. ls.commit();//加个判断条件
  73. }
  74. }while(!stop);
  75. stop=false;
  76. do{
  77. r3=deal6SourceTable("e_book_info");
  78. stop=this.createIndex(r3,ls,"3"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  79. if(!stop){
  80. ls.commit();//加个判断条件
  81. }
  82. }while(!stop);
  83. stop=false;
  84. do{
  85. r4=deal6SourceTable("bt_file_info");
  86. stop=this.createIndex(r4,ls,"4"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  87. if(!stop){
  88. ls.commit();//加个判断条件
  89. }
  90. }while(!stop);
  91. stop=false;
  92. do{
  93. r5=deal6SourceTable("characteristic_software_info");
  94. stop=this.createIndex(r5,ls,"5"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  95. if(!stop){
  96. ls.commit();//加个判断条件
  97. }
  98. }while(!stop);
  99. stop=false;
  100. do{
  101. r6=deal6SourceTable("source_code_info");
  102. stop=this.createIndex(r6,ls,"6"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  103. if(!stop){
  104. ls.commit();//加个判断条件
  105. }
  106. }while(!stop);
  107. stop=false;
  108. }
  109. public ResultSet deal2Share(String tableName) throws SQLException{
  110. String sql = "SELECT distinct NAME,SKYDRIVER_NAME,USERNAME,SHARE_TIME,DESCRIB,TYPE_ID,ID,SHORTURL from "+tableName+" where STATUS=1 and FS_ID ='1' limit "+NUM; //利用FS_ID这个字段,没什么用处
  111. ResultSet rs = (ResultSet) stmt.executeQuery(sql);
  112. return rs;
  113. }
  114. public ResultSet deal3Share(String tableName) throws SQLException{
  115. String sql = "SELECT distinct title,channel,uid,ctime,description,port,id,shorturl from "+tableName+" where name ='1' limit "+NUM;
  116. ResultSet rs = (ResultSet) stmt.executeQuery(sql);
  117. return rs;
  118. }
  119. public void Index3Data() throws SQLException{
  120. try {
  121. stmt = (Statement) conn.createStatement();
  122. } catch (SQLException e1) {
  123. e1.printStackTrace();
  124. }
  125. ResultSet r1=null;
  126. ResultSet r2=null;
  127. ResultSet r3=null;
  128. boolean stop=false;
  129. do{
  130. r1=deal2Share("share1");
  131. stop=this.createIndex(r1,ls,"7"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  132. if(!stop){
  133. ls.commit();//加个判断条件
  134. }
  135. //System.out.println("stop"+stop);
  136. }while(!stop);
  137. stop=false;
  138. do{
  139. r2=deal2Share("share2");
  140. stop=this.createIndex(r2,ls,"8"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  141. if(!stop){
  142. ls.commit();//加个判断条件
  143. }
  144. }while(!stop);
  145. stop=false;
  146. do{
  147. r3=deal3Share("share3");
  148. stop=this.createIndex(r3,ls,"9"); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引
  149. if(!stop){
  150. ls.commit();//加个判断条件
  151. }
  152. }while(!stop);
  153. stop=false;
  154. }
  155. public void update2ShareTable(String tableName) throws SQLException{
  156. Statement st = (Statement) conn.createStatement();
  157. String sql = "update "+tableName+" set FS_ID=0 where STATUS=1 and FS_ID ='1' limit "+NUM; //利用FS_ID这个字段,没什么用处
  158. //System.out.println("update"+sql);
  159. try {
  160. st.executeUpdate(sql);
  161. } catch (SQLException e) {
  162. e.printStackTrace();
  163. }
  164. }
  165. public void update3ShareTable(String tableName) throws SQLException{
  166. Statement st = (Statement) conn.createStatement();
  167. String sql = "update "+tableName+" set name=0 where name ='1' limit "+NUM;
  168. //System.out.println("update"+sql);
  169. try {
  170. st.executeUpdate(sql);
  171. } catch (SQLException e) {
  172. e.printStackTrace();
  173. }
  174. }
  175. public boolean createIndex(ResultSet rs,LuceneSearch ls,String mark) {
  176. try {
  177. String tableName=null;
  178. if(mark.equals("1")){
  179. tableName="film_and_tv_info";
  180. }
  181. if(mark.equals("2")){
  182. tableName="music_and_mv_info";
  183. }
  184. if(mark.equals("3")){
  185. tableName="e_book_info";
  186. }
  187. if(mark.equals("4")){
  188. tableName="bt_file_info";
  189. }
  190. if(mark.equals("5")){
  191. tableName="characteristic_software_info";
  192. }
  193. if(mark.equals("6")){
  194. tableName="source_code_info";
  195. }
  196. if(mark.equals("7")){
  197. tableName="share1";
  198. }
  199. if(mark.equals("8")){
  200. tableName="share2";
  201. }
  202. if(mark.equals("9")){
  203. tableName="share3";
  204. }
  205. boolean isNull=rs.next();
  206. //System.out.println("hehe"+isNull);
  207. if(isNull==false){
  208. return true;//处理完毕
  209. }
  210. while(isNull){
  211. if(Integer.parseInt(mark)>=1&&Integer.parseInt(mark)<=8){
  212. Document doc = new Document();
  213. //System.out.println("name"+rs.getString("NAME"));
  214. Field name = new Field("name",rs.getString("NAME"),Field.Store.YES,Field.Index.ANALYZED);
  215. String skName=rs.getString("SKYDRIVER_NAME");
  216. if(skName==null){
  217. skName="百度";
  218. }
  219. Field skydirverName = new Field("skydirverName",skName, Field.Store.YES,Field.Index.NOT_ANALYZED);
  220. Field username = new Field("username",rs.getString("USERNAME"),Field.Store.YES, Field.Index.ANALYZED);
  221. Field shareTime = new Field("shareTime",rs.getString("SHARE_TIME"), Field.Store.YES,Field.Index.NOT_ANALYZED);
  222. String desb=rs.getString("DESCRIB");
  223. if(desb==null){
  224. desb="-1";
  225. }
  226. Field describ = new Field("describ",desb,Field.Store.NO,Field.Index.NOT_ANALYZED);
  227. Field typeId = new Field("typeId",rs.getString("TYPE_ID"), Field.Store.YES,Field.Index.NOT_ANALYZED);
  228. Field id = new Field("id",rs.getString("ID"),Field.Store.YES,Field.Index.NOT_ANALYZED);
  229. Field url =null;
  230. if(Integer.parseInt(mark)>=7&&Integer.parseInt(mark)<=8){
  231. url = new Field("url",rs.getString("SHORTURL"), Field.Store.YES,Field.Index.ANALYZED);
  232. }
  233. else{
  234. url = new Field("url",rs.getString("URL"), Field.Store.YES,Field.Index.ANALYZED);
  235. }
  236. doc.add(name);
  237. doc.add(skydirverName);
  238. doc.add(username);
  239. doc.add(shareTime);
  240. doc.add(describ);
  241. doc.add(typeId);
  242. doc.add(id);
  243. doc.add(url);
  244. ls.singleUpdate(doc);//用跟新更为合适
  245. isNull=rs.next();
  246. }
  247. else{
  248. Document doc = new Document();
  249. //System.out.println("title"+rs.getString("title"));
  250. Field name = new Field("name",rs.getString("title"),Field.Store.YES,Field.Index.ANALYZED);
  251. String skName=rs.getString("channel");
  252. Field skydirverName = new Field("skydirverName",skName, Field.Store.YES,Field.Index.NOT_ANALYZED);
  253. Field username = new Field("username",rs.getString("uid"),Field.Store.YES, Field.Index.ANALYZED);
  254. Field shareTime = new Field("shareTime",rs.getString("ctime"), Field.Store.YES,Field.Index.NOT_ANALYZED);
  255. String desb=rs.getString("description");
  256. if(desb==null){
  257. desb="-1";
  258. }
  259. Field describ = new Field("describ",desb,Field.Store.NO,Field.Index.NOT_ANALYZED);
  260. Field typeId = new Field("typeId",rs.getString("port"), Field.Store.YES,Field.Index.NOT_ANALYZED);
  261. Field id = new Field("id",rs.getString("id"),Field.Store.YES,Field.Index.NOT_ANALYZED);
  262. Field url = new Field("url",rs.getString("shorturl"), Field.Store.YES,Field.Index.ANALYZED);
  263. doc.add(name);
  264. doc.add(skydirverName);
  265. doc.add(username);
  266. doc.add(shareTime);
  267. doc.add(describ);
  268. doc.add(typeId);
  269. doc.add(id);
  270. doc.add(url);
  271. ls.singleUpdate(doc);//用跟新更为合适
  272. isNull=rs.next();
  273. }
  274. count=count+1;
  275. }
  276. if(Integer.parseInt(mark)>=1&&Integer.parseInt(mark)<=6){
  277. update6SourceTable(tableName);//处理完成后做标志
  278. }
  279. else if(Integer.parseInt(mark)>=7&&Integer.parseInt(mark)<=8){
  280. update2ShareTable(tableName);//处理完成后做标志
  281. }
  282. else{
  283. update3ShareTable(tableName);//处理完成后做标志
  284. }
  285. System.out.println("Has index "+count+"条数据,数据来自表"+tableName);
  286. } catch (Exception e) {
  287. e.printStackTrace();
  288. }
  289. return false;
  290. }
复制代码

}
数据库之类的请不要关心,看思路即可,你如果需要换成你的即可,这里就不多说了。

看最后的部分:

package com.tray.indexData;

import java.sql.SQLException;

public class Application {

  1. public static void main(String[] args){
  2. /*IndexFile indexFile=new IndexFile();
  3. indexFile.indexInit();
  4. try {
  5. indexFile.Index6Data();
  6. } catch (SQLException e1) {
  7. e1.printStackTrace();
  8. }
  9. indexFile.indexEnd();*/
  10. IndexFile indexFile1=new IndexFile();
  11. indexFile1.indexInit();
  12. try {
  13. indexFile1.Index3Data();
  14. } catch (SQLException e1) {
  15. e1.printStackTrace();
  16. }
  17. indexFile1.indexEnd();
  18. LuceneSearch lch=new LuceneSearch();
  19. try {
  20. long a = System.currentTimeMillis();
  21. lch.highLightSearch("name", "flv", 1,3);
  22. long b = System.currentTimeMillis();
  23. long c = b - a;
  24. System.out.println("[高级检索花费时间:" + c + "毫秒]");
  25. } catch (Exception e) {
  26. e.printStackTrace();
  27. }
  28. }
复制代码

}
你可以在一个applicationic程序中开始索引,也可以写个定时器来定时索引,看需求。以上代码是楼主幸苦的作品,转载请不要改动,本人确保代码完全可用。

最新评论

小黑屋|在路上 ( 蜀ICP备15035742号-1 

;

GMT+8, 2025-5-6 15:40

Copyright 2015-2025 djqfx

返回顶部