r1687 - xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene
congruent
congruent at users.forge.objectweb.org
Wed Dec 6 17:50:25 CET 2006
Author: congruent
Date: 2006-12-06 17:50:25 +0100 (Wed, 06 Dec 2006)
New Revision: 1687
Modified:
xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexRebuilder.java
xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexUpdater.java
xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/LucenePlugin.java
xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/ObjectData.java
xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/TextExtractor.java
Log:
CURRIKI-10 Support XWiki Objects as searchable fields in Lucene Search
Modified: xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexRebuilder.java
===================================================================
--- xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexRebuilder.java 2006-12-06 15:49:47 UTC (rev 1686)
+++ xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexRebuilder.java 2006-12-06 16:50:25 UTC (rev 1687)
@@ -140,9 +140,10 @@
*/
private int addObjectsOfDocument(XWikiDocument document, XWikiContext wikiContext) {
int retval = 0;
+ Map xwikiObjects = document.getxWikiObjects();
if (document.hasElement(XWikiDocument.HAS_OBJECTS)) {
+ retval += xwikiObjects.size();
indexUpdater.addObject(document, wikiContext);
- retval++;
}
return retval;
}
Modified: xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexUpdater.java
===================================================================
--- xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexUpdater.java 2006-12-06 15:49:47 UTC (rev 1686)
+++ xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/IndexUpdater.java 2006-12-06 16:50:25 UTC (rev 1687)
@@ -24,11 +24,7 @@
import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Properties;
+import java.util.*;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
@@ -40,6 +36,7 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.document.Field;
import com.xpn.xwiki.XWiki;
import com.xpn.xwiki.XWikiContext;
@@ -54,109 +51,98 @@
* @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
*/
public class IndexUpdater implements Runnable, XWikiDocChangeNotificationInterface,
- XWikiActionNotificationInterface
-{
+ XWikiActionNotificationInterface {
- private static final Logger LOG = Logger.getLogger (IndexUpdater.class);
+ private static final Logger LOG = Logger.getLogger(IndexUpdater.class);
- /** Milliseconds of sleep between checks for changed documents */
- private int indexingInterval = 300000;
- private boolean exit = false;
- private IndexWriter writer;
- private String indexDir;
- private XWikiDocumentQueue queue = new XWikiDocumentQueue ();
- private Analyzer analyzer;
- private LucenePlugin plugin;
- private IndexSearcher searcher;
- private IndexReader reader;
+ /**
+ * Milliseconds of sleep between checks for changed documents
+ */
+ private int indexingInterval = 300000;
+ private boolean exit = false;
+ private IndexWriter writer;
+ private String indexDir;
+ private XWikiDocumentQueue queue = new XWikiDocumentQueue();
+ private Analyzer analyzer;
+ private LucenePlugin plugin;
+ private IndexSearcher searcher;
+ private IndexReader reader;
- private XWikiContext context;
- private XWiki xwiki;
+ private XWikiContext context;
+ private XWiki xwiki;
- public void doExit ()
- {
+ static List fields = new ArrayList();
+
+
+ public void doExit() {
exit = true;
}
/**
* Main loop. Polls the queue for documents to be indexed.
+ *
* @see java.lang.Runnable#run()
*/
- public void run ()
- {
- MDC.put ("url", "index updating thread");
+ public void run() {
+ MDC.put("url", "index updating thread");
- while (!exit)
- {
- if (queue.isEmpty ())
- {
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("IndexUpdater: queue empty, nothing to do");
+ while (!exit) {
+ if (queue.isEmpty()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("IndexUpdater: queue empty, nothing to do");
}
- } else
- {
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("IndexUpdater: documents in queue, start indexing");
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("IndexUpdater: documents in queue, start indexing");
}
// we need a searcher to find old versions of documents
- openSearcher ();
- openWriter (false);
- List oldDocs = new ArrayList ();
+ openSearcher();
+ openWriter(false);
+ List oldDocs = new ArrayList();
- while (!queue.isEmpty ())
- {
- IndexData data = queue.remove ();
+ while (!queue.isEmpty()) {
+ IndexData data = queue.remove();
- try
- {
- oldDocs.addAll (getOldIndexDocIds (data));
- XWikiDocument doc = xwiki.getDocument (data.getFullName (), context);
- addToIndex (data, doc, context);
- } catch (Exception e)
- {
- LOG.error ("error retrieving doc from own context: " + e.getMessage (), e);
- e.printStackTrace ();
+ try {
+ oldDocs.addAll(getOldIndexDocIds(data));
+ XWikiDocument doc = xwiki.getDocument(data.getFullName(), context);
+ addToIndex(data, doc, context);
+ } catch (Exception e) {
+ LOG.error("error retrieving doc from own context: " + e.getMessage(), e);
+ e.printStackTrace();
}
}
- closeWriter ();
+ closeWriter();
// the following searcher close/open cycle is necessary because
// the old reader is not valid for document deletion anymore
// after
// updating the index
- closeSearcher ();
- openSearcher ();
- deleteOldDocs (oldDocs);
- closeSearcher ();
+ closeSearcher();
+ openSearcher();
+ deleteOldDocs(oldDocs);
+ closeSearcher();
// readers and searchers should be reopened after index update
- plugin.openSearchers ();
+ plugin.openSearchers();
}
- try
- {
- Thread.sleep (indexingInterval);
- } catch (InterruptedException e)
- {
+ try {
+ Thread.sleep(indexingInterval);
+ } catch (InterruptedException e) {
// TODO Auto-generated catch block
- e.printStackTrace ();
+ e.printStackTrace();
}
}
- xwiki.getStore ().cleanUp (context);
- MDC.remove ("url");
+ xwiki.getStore().cleanUp(context);
+ MDC.remove("url");
}
- private synchronized void closeSearcher ()
- {
- try
- {
- if (searcher != null) searcher.close ();
- if (reader != null) reader.close ();
- } catch (IOException e)
- {
- LOG.error ("error closing index searcher", e);
- e.printStackTrace ();
- } finally
- {
+ private synchronized void closeSearcher() {
+ try {
+ if (searcher != null) searcher.close();
+ if (reader != null) reader.close();
+ } catch (IOException e) {
+ LOG.error("error closing index searcher", e);
+ e.printStackTrace();
+ } finally {
searcher = null;
reader = null;
}
@@ -166,39 +152,32 @@
* Opens the index reader and searcher used for finding and deleting old
* versions of indexed documents.
*/
- private synchronized void openSearcher ()
- {
- try
- {
- reader = IndexReader.open (indexDir);
- searcher = new IndexSearcher (reader);
- } catch (IOException e)
- {
- LOG.error ("error opening index searcher", e);
- e.printStackTrace ();
+ private synchronized void openSearcher() {
+ try {
+ reader = IndexReader.open(indexDir);
+ searcher = new IndexSearcher(reader);
+ } catch (IOException e) {
+ LOG.error("error opening index searcher", e);
+ e.printStackTrace();
}
}
/**
* Deletes the documents with the given ids from the index.
+ *
* @param oldDocs
*/
- private void deleteOldDocs (List oldDocs)
- {
- for (Iterator iter = oldDocs.iterator (); iter.hasNext ();)
- {
- Integer id = (Integer) iter.next ();
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("delete doc " + id);
+ private void deleteOldDocs(List oldDocs) {
+ for (Iterator iter = oldDocs.iterator(); iter.hasNext();) {
+ Integer id = (Integer) iter.next();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("delete doc " + id);
}
- try
- {
- reader.deleteDocument (id.intValue ());
- } catch (IOException e1)
- {
- LOG.error ("error deleting doc " + id, e1);
- e1.printStackTrace ();
+ try {
+ reader.deleteDocument(id.intValue());
+ } catch (IOException e1) {
+ LOG.error("error deleting doc " + id, e1);
+ e1.printStackTrace();
}
}
}
@@ -207,80 +186,64 @@
* @param data
* @return
*/
- private Collection getOldIndexDocIds (IndexData data)
- {
- List retval = new ArrayList (3);
- Query query = data.buildQuery ();
- try
- {
- Hits hits = searcher.search (query);
- for (int i = 0; i < hits.length (); i++)
- {
- retval.add (new Integer (hits.id (i)));
+ private Collection getOldIndexDocIds(IndexData data) {
+ List retval = new ArrayList(3);
+ Query query = data.buildQuery();
+ try {
+ Hits hits = searcher.search(query);
+ for (int i = 0; i < hits.length(); i++) {
+ retval.add(new Integer(hits.id(i)));
}
- } catch (IOException e)
- {
- LOG.error ("error looking for old versions of document " + data + " with query " + query, e);
- e.printStackTrace ();
+ } catch (IOException e) {
+ LOG.error("error looking for old versions of document " + data + " with query " + query, e);
+ e.printStackTrace();
}
return retval;
}
/**
- *
+ *
*/
- private void openWriter (boolean create)
- {
- if (writer != null)
- {
- LOG.error ("Writer already open and createWriter called");
+ private void openWriter(boolean create) {
+ if (writer != null) {
+ LOG.error("Writer already open and createWriter called");
return;
}
- try
- {
+ try {
// fix for windows by Daniel Cortes:
- FSDirectory f = FSDirectory.getDirectory(indexDir,false);
- writer = new IndexWriter (f, analyzer, create);
+ FSDirectory f = FSDirectory.getDirectory(indexDir, false);
+ writer = new IndexWriter(f, analyzer, create);
//writer = new IndexWriter (indexDir, analyzer, create);
- writer.setUseCompoundFile (true);
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("successfully opened index writer : " + indexDir);
+ writer.setUseCompoundFile(true);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("successfully opened index writer : " + indexDir);
}
- } catch (IOException e)
- {
- LOG.error ("IOException when opening Lucene Index for writing at " + indexDir, e);
+ } catch (IOException e) {
+ LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e);
}
}
/**
- *
+ *
*/
- private void closeWriter ()
- {
- if (writer == null)
- {
- LOG.error ("Writer not open and closeWriter called");
+ private void closeWriter() {
+ if (writer == null) {
+ LOG.error("Writer not open and closeWriter called");
return;
}
- try
- {
- writer.optimize ();
- } catch (IOException e1)
- {
- LOG.error ("Exception caught when optimizing Index", e1);
+ try {
+ writer.optimize();
+ } catch (IOException e1) {
+ LOG.error("Exception caught when optimizing Index", e1);
}
- try
- {
- writer.close ();
- } catch (Exception e)
- {
- LOG.error ("Exception caught when closing IndexWriter", e);
+ try {
+ writer.close();
+ } catch (Exception e) {
+ LOG.error("Exception caught when closing IndexWriter", e);
}
writer = null;
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("closed writer.");
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("closed writer.");
}
}
@@ -289,119 +252,109 @@
* @param doc
* @throws IOException
*/
- private void addToIndex (IndexData data, XWikiDocument doc, XWikiContext context) throws IOException
- {
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("addToIndex: " + data);
+ private void addToIndex(IndexData data, XWikiDocument doc, XWikiContext context) throws IOException {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("addToIndex: " + data);
}
- org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document ();
- data.addDataToLuceneDocument (luceneDoc, doc, context);
- writer.addDocument (luceneDoc);
+ org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
+ data.addDataToLuceneDocument(luceneDoc, doc, context);
+ Field fld = null;
+ // collecting all the fields for using up in search
+ for (Enumeration e = luceneDoc.fields(); e.hasMoreElements();) {
+ fld = (Field) e.nextElement();
+ if (!fields.contains(fld.name())) {
+ fields.add(fld.name());
+ }
+ }
+ writer.addDocument(luceneDoc);
}
/**
- * @param indexDir
- * The indexDir to set.
+ * @param indexDir The indexDir to set.
*/
- public void setIndexDir (String indexDir)
- {
+ public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
/**
- * @param analyzer
- * The analyzer to set.
+ * @param analyzer The analyzer to set.
*/
- public void setAnalyzer (Analyzer analyzer)
- {
+ public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
/**
* @param config
*/
- public synchronized void init (Properties config, LucenePlugin plugin, XWiki wiki)
- {
+ public synchronized void init(Properties config, LucenePlugin plugin, XWiki wiki) {
this.xwiki = wiki;
- this.context = new XWikiContext ();
- this.context.setWiki (xwiki);
- this.context.setDatabase (xwiki.getDatabase ());
+ this.context = new XWikiContext();
+ this.context.setWiki(xwiki);
+ this.context.setDatabase(xwiki.getDatabase());
this.plugin = plugin;
// take the first configured index dir as the one for writing
- String[] indexDirs = StringUtils.split (config.getProperty (LucenePlugin.PROP_INDEX_DIR), " ,");
- if (indexDirs != null && indexDirs.length > 0)
- {
+ String[] indexDirs = StringUtils.split(config.getProperty(LucenePlugin.PROP_INDEX_DIR), " ,");
+ if (indexDirs != null && indexDirs.length > 0) {
this.indexDir = indexDirs[0];
- File f = new File (indexDir);
- if (!f.isDirectory ())
- {
- f.mkdirs ();
- cleanIndex ();
+ File f = new File(indexDir);
+ if (!f.isDirectory()) {
+ f.mkdirs();
+ cleanIndex();
}
}
- indexingInterval = 1000 * Integer.parseInt (config.getProperty (LucenePlugin.PROP_INDEXING_INTERVAL,
- "300"));
- openSearcher ();
+ indexingInterval = 1000 * Integer.parseInt(config.getProperty(LucenePlugin.PROP_INDEXING_INTERVAL,
+ "300"));
+ openSearcher();
}
/**
- *
+ *
*/
- public void cleanIndex ()
- {
- LOG.info ("trying to clear index for rebuilding");
- while (writer != null)
- {
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("waiting for existing index writer to close");
+ public void cleanIndex() {
+ LOG.info("trying to clear index for rebuilding");
+ while (writer != null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("waiting for existing index writer to close");
}
- try
- {
- Thread.sleep (1000);
- } catch (InterruptedException e)
- {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
// TODO Auto-generated catch block
- e.printStackTrace ();
+ e.printStackTrace();
}
}
- synchronized (this)
- {
- openWriter (true);
- closeWriter ();
+ synchronized (this) {
+ openWriter(true);
+ closeWriter();
}
}
/**
* @param document
*/
- public void add (XWikiDocument document, XWikiContext context)
- {
- queue.add (new DocumentData (document, context));
+ public void add(XWikiDocument document, XWikiContext context) {
+ queue.add(new DocumentData(document, context));
if (document.hasElement(XWikiDocument.HAS_OBJECTS)) {
addObject(document, context);
}
}
- /**
+ /**
* @param document
- * @param context
+ * @param context
*/
- public void addObject(XWikiDocument document, XWikiContext context )
- {
- queue.add (new ObjectData (document, context));
+ public void addObject(XWikiDocument document, XWikiContext context) {
+ queue.add(new ObjectData(document, context));
}
/**
* @param attachment
*/
- public void add (XWikiDocument document, XWikiAttachment attachment, XWikiContext context)
- {
+ public void add(XWikiDocument document, XWikiAttachment attachment, XWikiContext context) {
if (document != null && attachment != null && context != null)
- queue.add (new AttachmentData (document, attachment, context));
+ queue.add(new AttachmentData(document, attachment, context));
else
- LOG.error ("invalid parameters given to add: " + document + ", " + attachment + ", " + context);
+ LOG.error("invalid parameters given to add: " + document + ", " + attachment + ", " + context);
}
@@ -423,59 +376,51 @@
/**
* Notification of changes in document content
+ *
* @see com.xpn.xwiki.notify.XWikiNotificationInterface#notify(com.xpn.xwiki.notify.XWikiNotificationRule,
- * com.xpn.xwiki.doc.XWikiDocument, com.xpn.xwiki.doc.XWikiDocument,
- * int, com.xpn.xwiki.XWikiContext)
+ *com.xpn.xwiki.doc.XWikiDocument,com.xpn.xwiki.doc.XWikiDocument,
+ *int,com.xpn.xwiki.XWikiContext)
*/
- public void notify (XWikiNotificationRule rule, XWikiDocument newDoc, XWikiDocument oldDoc, int event,
- XWikiContext context)
- {
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("notify from XWikiDocChangeNotificationInterface, event=" + event + ", newDoc="
+ public void notify(XWikiNotificationRule rule, XWikiDocument newDoc, XWikiDocument oldDoc, int event,
+ XWikiContext context) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("notify from XWikiDocChangeNotificationInterface, event=" + event + ", newDoc="
+ newDoc + " oldDoc=" + oldDoc);
}
- try
- {
- add (newDoc, context);
- } catch (Exception e)
- {
- LOG.error ("error in notify", e);
+ try {
+ add(newDoc, context);
+ } catch (Exception e) {
+ LOG.error("error in notify", e);
}
}
/**
* Notification of attachment uploads.
+ *
* @see com.xpn.xwiki.notify.XWikiActionNotificationInterface#notify(com.xpn.xwiki.notify.XWikiNotificationRule,
- * com.xpn.xwiki.doc.XWikiDocument, java.lang.String,
- * com.xpn.xwiki.XWikiContext)
+ *com.xpn.xwiki.doc.XWikiDocument,java.lang.String,
+ *com.xpn.xwiki.XWikiContext)
*/
- public void notify (XWikiNotificationRule arg0, XWikiDocument doc, String action, XWikiContext context)
- {
- if ("upload".equals (action))
- {
- if (LOG.isDebugEnabled ())
- {
- LOG.debug ("upload action notification for doc " + doc.getName ());
+ public void notify(XWikiNotificationRule arg0, XWikiDocument doc, String action, XWikiContext context) {
+ if ("upload".equals(action)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("upload action notification for doc " + doc.getName());
}
- try
- {
- List attachments = doc.getAttachmentList ();
+ try {
+ List attachments = doc.getAttachmentList();
// find out the most recently changed attachment
XWikiAttachment newestAttachment = null;
- for (Iterator iter = attachments.iterator (); iter.hasNext ();)
- {
- XWikiAttachment attachment = (XWikiAttachment) iter.next ();
+ for (Iterator iter = attachments.iterator(); iter.hasNext();) {
+ XWikiAttachment attachment = (XWikiAttachment) iter.next();
if (newestAttachment != null
- && attachment.getDate ().before (newestAttachment.getDate ()))
+ && attachment.getDate().before(newestAttachment.getDate()))
newestAttachment = attachment;
else
newestAttachment = attachment;
}
- add (doc, newestAttachment, context);
- } catch (Exception e)
- {
- LOG.error ("error in notify", e);
+ add(doc, newestAttachment, context);
+ } catch (Exception e) {
+ LOG.error("error in notify", e);
}
}
}
Modified: xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/LucenePlugin.java
===================================================================
--- xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/LucenePlugin.java 2006-12-06 15:49:47 UTC (rev 1686)
+++ xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/LucenePlugin.java 2006-12-06 16:50:25 UTC (rev 1687)
@@ -34,6 +34,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import com.xpn.xwiki.XWikiContext;
@@ -46,278 +47,255 @@
import com.xpn.xwiki.plugin.XWikiDefaultPlugin;
import com.xpn.xwiki.plugin.XWikiPluginInterface;
-public class LucenePlugin extends XWikiDefaultPlugin implements XWikiPluginInterface
-{
- public static final String DOCTYPE_WIKIPAGE = "wikipage";
- public static final String DOCTYPE_ATTACHMENT = "attachment";
+public class LucenePlugin extends XWikiDefaultPlugin implements XWikiPluginInterface {
+ public static final String DOCTYPE_WIKIPAGE = "wikipage";
+ public static final String DOCTYPE_ATTACHMENT = "attachment";
- private static final Logger LOG = Logger.getLogger (LucenePlugin.class);
- private Analyzer analyzer;
- private IndexUpdater indexUpdater;
- private Thread indexUpdaterThread;
- protected Properties config;
+ private static final Logger LOG = Logger.getLogger(LucenePlugin.class);
+ private Analyzer analyzer;
+ private IndexUpdater indexUpdater;
+ private Thread indexUpdaterThread;
+ protected Properties config;
- public static final String PROP_INDEX_DIR = "xwiki.plugins.lucene.indexdir";
- public static final String PROP_ANALYZER = "xwiki.plugins.lucene.analyzer";
- public static final String PROP_INDEXING_INTERVAL = "xwiki.plugins.lucene.indexinterval";
+ public static final String PROP_INDEX_DIR = "xwiki.plugins.lucene.indexdir";
+ public static final String PROP_ANALYZER = "xwiki.plugins.lucene.analyzer";
+ public static final String PROP_INDEXING_INTERVAL = "xwiki.plugins.lucene.indexinterval";
- private static final String DEFAULT_ANALYZER = "org.apache.lucene.analysis.de.GermanAnalyzer";
- private Searcher[] searchers;
- private String indexDirs;
- private IndexRebuilder indexRebuilder;
+ private static final String DEFAULT_ANALYZER = "org.apache.lucene.analysis.de.GermanAnalyzer";
+ private Searcher[] searchers;
+ private String indexDirs;
+ private IndexRebuilder indexRebuilder;
- public LucenePlugin (String name, String className, XWikiContext context)
- {
- super (name, className, context);
- init (context);
+ public LucenePlugin(String name, String className, XWikiContext context) {
+ super(name, className, context);
+ init(context);
}
/**
* @see java.lang.Object#finalize()
*/
- protected void finalize () throws Throwable
- {
- if (indexUpdater != null) indexUpdater.doExit ();
- super.finalize ();
+ protected void finalize() throws Throwable {
+ if (indexUpdater != null) indexUpdater.doExit();
+ super.finalize();
}
- public synchronized int rebuildIndex (com.xpn.xwiki.api.XWiki wiki, XWikiContext context)
- {
- return indexRebuilder.rebuildIndex (wiki, context);
+ public synchronized int rebuildIndex(com.xpn.xwiki.api.XWiki wiki, XWikiContext context) {
+ return indexRebuilder.rebuildIndex(wiki, context);
}
/**
* Allows to search special named lucene indexes without having to configure
* them in xwiki.cfg. Slower than {@link #getSearchResults}since
* new index searcher instances are created for every query.
- * @param query
- * query string
- * @param myIndexDirs
- * comma separated list of directories containing the lucene
- * indexes to search.
- * @param languages
- * comma separated list of language codes to search in, may be
- * null to search all languages
+ *
+ * @param query query string
+ * @param myIndexDirs comma separated list of directories containing the lucene
+ * indexes to search.
+ * @param languages comma separated list of language codes to search in, may be
+ * null to search all languages
* @param wiki
* @return
* @throws Exception
*/
- public SearchResults getSearchResultsFromIndexes (String query, String myIndexDirs, String languages,
- XWiki wiki) throws Exception
- {
- Searcher[] mySearchers = createSearchers (myIndexDirs);
- SearchResults retval = search (query, null, languages, mySearchers, wiki);
- closeSearchers (mySearchers);
+ public SearchResults getSearchResultsFromIndexes(String query, String myIndexDirs, String languages,
+ XWiki wiki) throws Exception {
+ Searcher[] mySearchers = createSearchers(myIndexDirs);
+ SearchResults retval = search(query, null, languages, mySearchers, wiki);
+ closeSearchers(mySearchers);
return retval;
}
/**
* Searches all Indexes configured in xwiki.cfg (property
* <code>xwiki.plugins.lucene.indexdir</code>)
- * @param query
- * query String entered into a search form
- * @param wiki
- * XWiki
- * @param virtualWikiNames
- * Name of the virtual Wiki to search, global search when null
- * @param languages
- * comma separated list of language codes to search in, may be
- * null to search all languages
+ *
+ * @param query query String entered into a search form
+ * @param wiki XWiki
+ * @param virtualWikiNames Name of the virtual Wiki to search, global search when null
+ * @param languages comma separated list of language codes to search in, may be
+ * null to search all languages
* @return Searchresults as a collection of Maps
- * @throws Exception
- * in case of error(s)
+ * @throws Exception in case of error(s)
*/
- public SearchResults getSearchResults (String query, String virtualWikiNames, String languages, XWiki wiki)
- throws Exception
- {
- return search (query, virtualWikiNames, languages, this.searchers, wiki);
+ public SearchResults getSearchResults(String query, String virtualWikiNames, String languages, XWiki wiki)
+ throws Exception {
+ return search(query, virtualWikiNames, languages, this.searchers, wiki);
}
/**
* @param query
* @param indexes
- * @param virtualWikiNames
- * comma separated list of virtual wiki names to search in, may
- * be null to search all virtual wikis
- * @param languages
- * comma separated list of language codes to search in, may be
- * null to search all languages
+ * @param virtualWikiNames comma separated list of virtual wiki names to search in, may
+ * be null to search all virtual wikis
+ * @param languages comma separated list of language codes to search in, may be
+ * null to search all languages
* @return
* @throws IOException
* @throws ParseException
*/
- private SearchResults search (String query, String virtualWikiNames, String languages,
- Searcher[] indexes, XWiki wiki) throws IOException, ParseException
- {
- MultiSearcher searcher = new MultiSearcher (indexes);
- Query q = buildQuery (query, virtualWikiNames, languages);
- Hits hits = searcher.search (q);
- final int hitcount = hits.length ();
- if (LOG.isDebugEnabled ()) LOG.debug ("query " + q + " returned " + hitcount + " hits");
- return new SearchResults (hits, wiki);
+ private SearchResults search(String query, String virtualWikiNames, String languages,
+ Searcher[] indexes, XWiki wiki) throws IOException, ParseException {
+ MultiSearcher searcher = new MultiSearcher(indexes);
+ Query q = buildQuery(query, virtualWikiNames, languages);
+ Hits hits = searcher.search(q);
+ final int hitcount = hits.length();
+ if (LOG.isDebugEnabled()) LOG.debug("query " + q + " returned " + hitcount + " hits");
+ return new SearchResults(hits, wiki);
}
/**
* @param query
- * @param virtualWikiNames
- * comma separated list of virtual wiki names
- * @param languages
- * comma separated list of language codes to search in, may be
- * null to search all languages
+ * @param virtualWikiNames comma separated list of virtual wiki names
+ * @param languages comma separated list of language codes to search in, may be
+ * null to search all languages
* @throws ParseException
*/
- private Query buildQuery (String query, String virtualWikiNames, String languages) throws ParseException
- {
+ private Query buildQuery(String query, String virtualWikiNames, String languages) throws ParseException {
// build a query like this: <user query string> AND <wikiNamesQuery> AND
// <languageQuery>
- BooleanQuery bQuery = new BooleanQuery ();
- QueryParser qParser = new QueryParser(IndexFields.FULLTEXT, new StandardAnalyzer());
- qParser.setDefaultOperator(QueryParser.Operator.AND);
- Query parsedQuery = qParser.parse (query);
- bQuery.add (parsedQuery, BooleanClause.Occur.MUST);
- if (virtualWikiNames != null && virtualWikiNames.length () > 0)
- {
- bQuery.add (buildOredTermQuery (virtualWikiNames, IndexFields.DOCUMENT_WIKI), BooleanClause.Occur.MUST);
+ BooleanQuery bQuery = new BooleanQuery();
+ Query parsedQuery = null;
+
+ // for object search
+ if (query.contains(":")) {
+ String property = query.substring(0, query.indexOf(":"));
+ query = query.substring(query.indexOf(":") + 1, query.length());
+ QueryParser qp = new QueryParser(property, analyzer);
+ parsedQuery = qp.parse(query);
+ bQuery.add(parsedQuery, BooleanClause.Occur.MUST);
+ return bQuery;
}
- if (languages != null && languages.length () > 0)
- {
- bQuery.add (buildOredTermQuery (languages, IndexFields.DOCUMENT_LANGUAGE), BooleanClause.Occur.MUST);
+
+ //for fulltext search
+ List fieldList = IndexUpdater.fields;
+ String[] fields = (String[]) fieldList.toArray(new String[fieldList.size()]);
+ BooleanClause.Occur[] flags = new BooleanClause.Occur[fields.length];
+ for (int i = 0; i < flags.length; i++) {
+ flags[i] = BooleanClause.Occur.SHOULD;
}
+ parsedQuery = MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+ bQuery.add(parsedQuery, BooleanClause.Occur.MUST);
+
+ if (virtualWikiNames != null && virtualWikiNames.length() > 0) {
+ bQuery.add(buildOredTermQuery(virtualWikiNames, IndexFields.DOCUMENT_WIKI), BooleanClause.Occur.SHOULD);
+ }
+ if (languages != null && languages.length() > 0) {
+ bQuery.add(buildOredTermQuery(languages, IndexFields.DOCUMENT_LANGUAGE), BooleanClause.Occur.SHOULD);
+ }
return bQuery;
}
/**
- * @param values
- * comma separated list of values to look for
+ * @param values comma separated list of values to look for
* @return A query returning documents matching one of the given values in
* the given field
*/
- private Query buildOredTermQuery (final String values, final String fieldname)
- {
- String[] valueArray = values.split ("\\,");
- if (valueArray.length > 1)
- {
+ private Query buildOredTermQuery(final String values, final String fieldname) {
+ String[] valueArray = values.split("\\,");
+ if (valueArray.length > 1) {
// build a query like this: <valueArray[0]> OR <valueArray[1]> OR ...
- BooleanQuery orQuery = new BooleanQuery ();
- for (int i = 0; i < valueArray.length; i++)
- {
- orQuery.add(new TermQuery (new Term (fieldname, valueArray[i].trim ())), BooleanClause.Occur.SHOULD);
+ BooleanQuery orQuery = new BooleanQuery();
+ for (int i = 0; i < valueArray.length; i++) {
+ orQuery.add(new TermQuery(new Term(fieldname, valueArray[i].trim())), BooleanClause.Occur.SHOULD);
}
return orQuery;
}
// exactly one value, no OR'ed Terms necessary
- return new TermQuery (new Term (fieldname, valueArray[0]));
+ return new TermQuery(new Term(fieldname, valueArray[0]));
}
- public synchronized void init (XWikiContext context)
- {
- super.init (context);
- if (LOG.isDebugEnabled ()) LOG.debug ("lucene plugin: in init");
- config = context.getWiki ().getConfig ();
- try
- {
- analyzer = (Analyzer) Class.forName (config.getProperty (PROP_ANALYZER, DEFAULT_ANALYZER))
- .newInstance ();
- } catch (Exception e)
- {
- e.printStackTrace ();
- LOG.error ("error instantiating analyzer : ", e);
- LOG.warn ("using default analyzer class: " + DEFAULT_ANALYZER);
- try
- {
- analyzer = (Analyzer) Class.forName (DEFAULT_ANALYZER).newInstance ();
- } catch (Exception e1)
- {
- e1.printStackTrace ();
- throw new RuntimeException ("instantiation of default analyzer " + DEFAULT_ANALYZER
+ public synchronized void init(XWikiContext context) {
+ super.init(context);
+ if (LOG.isDebugEnabled()) LOG.debug("lucene plugin: in init");
+ config = context.getWiki().getConfig();
+ try {
+ analyzer = (Analyzer) Class.forName(config.getProperty(PROP_ANALYZER, DEFAULT_ANALYZER))
+ .newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ LOG.error("error instantiating analyzer : ", e);
+ LOG.warn("using default analyzer class: " + DEFAULT_ANALYZER);
+ try {
+ analyzer = (Analyzer) Class.forName(DEFAULT_ANALYZER).newInstance();
+ } catch (Exception e1) {
+ e1.printStackTrace();
+ throw new RuntimeException("instantiation of default analyzer " + DEFAULT_ANALYZER
+ " failed", e1);
}
}
- this.indexDirs = config.getProperty (PROP_INDEX_DIR);
- openSearchers ();
- indexUpdater = new IndexUpdater ();
- indexUpdater.setAnalyzer (analyzer);
- indexUpdater.init (config, this, context.getWiki ());
- indexUpdaterThread = new Thread (indexUpdater);
- indexUpdaterThread.start ();
- indexRebuilder = new IndexRebuilder ();
- indexRebuilder.setIndexUpdater (indexUpdater);
- context.getWiki ().getNotificationManager ().addGeneralRule (new DocChangeRule (indexUpdater));
- context.getWiki ().getNotificationManager ().addGeneralRule (new XWikiActionRule (indexUpdater));
- LOG.info ("lucene plugin initialized.");
+ this.indexDirs = config.getProperty(PROP_INDEX_DIR);
+ openSearchers();
+ indexUpdater = new IndexUpdater();
+ indexUpdater.setAnalyzer(analyzer);
+ indexUpdater.init(config, this, context.getWiki());
+ indexUpdaterThread = new Thread(indexUpdater);
+ indexUpdaterThread.start();
+ indexRebuilder = new IndexRebuilder();
+ indexRebuilder.setIndexUpdater(indexUpdater);
+ context.getWiki().getNotificationManager().addGeneralRule(new DocChangeRule(indexUpdater));
+ context.getWiki().getNotificationManager().addGeneralRule(new XWikiActionRule(indexUpdater));
+ LOG.info("lucene plugin initialized.");
}
- public String getName ()
- {
+ public String getName() {
return "lucene";
}
- public Api getPluginApi (XWikiPluginInterface plugin, XWikiContext context)
- {
- return new LucenePluginApi ((LucenePlugin) plugin, context);
+ public Api getPluginApi(XWikiPluginInterface plugin, XWikiContext context) {
+ return new LucenePluginApi((LucenePlugin) plugin, context);
}
/**
* Creates an array of Searchers for a number of lucene indexes.
- * @param indexDirs
- * Comma separated list of Lucene index directories to create
- * searchers for.
+ *
+ * @param indexDirs Comma separated list of Lucene index directories to create
+ * searchers for.
* @return Array of searchers
* @throws Exception
*/
- private static Searcher[] createSearchers (String indexDirs) throws Exception
- {
- String[] dirs = StringUtils.split (indexDirs, " ,");
- List searchersList = new ArrayList ();
- for (int i = 0; i < dirs.length; i++)
- {
- try
- {
- IndexReader reader = IndexReader.open (dirs[i]);
- searchersList.add (new IndexSearcher (reader));
- } catch (IOException e)
- {
- LOG.error ("cannot open index " + dirs[i], e);
- e.printStackTrace ();
+ public static Searcher[] createSearchers(String indexDirs) throws Exception {
+ String[] dirs = StringUtils.split(indexDirs, " ,");
+ List searchersList = new ArrayList();
+ for (int i = 0; i < dirs.length; i++) {
+ try {
+ IndexReader reader = IndexReader.open(dirs[i]);
+ searchersList.add(new IndexSearcher(reader));
+ } catch (IOException e) {
+ LOG.error("cannot open index " + dirs[i], e);
+ e.printStackTrace();
}
}
- return (Searcher[]) searchersList.toArray (new Searcher[searchersList.size ()]);
+ return (Searcher[]) searchersList.toArray(new Searcher[searchersList.size()]);
}
/**
* Opens the searchers for the configured index Dirs after closing any
* already existing ones.
*/
- protected synchronized void openSearchers ()
- {
- try
- {
- closeSearchers (this.searchers);
- this.searchers = createSearchers (indexDirs);
- } catch (Exception e1)
- {
- LOG.error ("error opening searchers for index dirs " + config.getProperty (PROP_INDEX_DIR), e1);
- throw new RuntimeException ("error opening searchers for index dirs "
- + config.getProperty (PROP_INDEX_DIR), e1);
+ protected synchronized void openSearchers() {
+ try {
+ closeSearchers(this.searchers);
+ this.searchers = createSearchers(indexDirs);
+ } catch (Exception e1) {
+ LOG.error("error opening searchers for index dirs " + config.getProperty(PROP_INDEX_DIR), e1);
+ throw new RuntimeException("error opening searchers for index dirs "
+ + config.getProperty(PROP_INDEX_DIR), e1);
}
}
/**
* @throws IOException
*/
- protected static void closeSearchers (Searcher[] searchers) throws IOException
- {
- if (searchers != null)
- {
- for (int i = 0; i < searchers.length; i++)
- {
- if (searchers[i] != null) searchers[i].close ();
+ protected static void closeSearchers(Searcher[] searchers) throws IOException {
+ if (searchers != null) {
+ for (int i = 0; i < searchers.length; i++) {
+ if (searchers[i] != null) searchers[i].close();
}
}
}
public long getQueueSize() {
- return indexUpdater.getQueueSize();
+ return indexUpdater.getQueueSize();
}
public void queueDocument(XWikiDocument doc, XWikiContext context) {
Modified: xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/ObjectData.java
===================================================================
--- xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/ObjectData.java 2006-12-06 15:49:47 UTC (rev 1686)
+++ xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/ObjectData.java 2006-12-06 16:50:25 UTC (rev 1687)
@@ -23,10 +23,16 @@
package net.jkraemer.xwiki.plugins.lucene;
import org.apache.log4j.Logger;
+import org.apache.lucene.document.Field;
import com.xpn.xwiki.doc.XWikiDocument;
import com.xpn.xwiki.XWikiContext;
import com.xpn.xwiki.objects.BaseProperty;
import com.xpn.xwiki.objects.BaseObject;
+import com.xpn.xwiki.objects.PropertyInterface;
+import com.xpn.xwiki.objects.classes.BaseClass;
+import com.xpn.xwiki.objects.classes.StaticListClass;
+import com.xpn.xwiki.objects.classes.ListClass;
+import com.xpn.xwiki.objects.classes.ListItem;
import java.util.List;
import java.util.Map;
@@ -57,14 +63,13 @@
/**
* @return a string containing the result of
- * {@link IndexData#getFullText(XWikiDocument, XWikiContext)}plus
+ * {@link IndexData#getFullText(XWikiDocument,XWikiContext)}plus
* the full text content (values of title,category,content and extract ) XWiki.ArticleClass Object, as far as it could be
* extracted.
*/
public String getFullText(XWikiDocument doc, XWikiContext context) {
StringBuffer retval = new StringBuffer(super.getFullText(doc, context));
- String contentText = null;
- contentText = getContentAsText(doc, context);
+ String contentText = getContentAsText(doc, context);
if (contentText != null) {
retval.append(" ").append(contentText).toString();
}
@@ -79,15 +84,15 @@
private String getContentAsText(XWikiDocument doc, XWikiContext context) {
StringBuffer contentText = new StringBuffer();
try {
- LOG.info(doc.getFullName());
- Map objects = doc.getxWikiObjects();
- Iterator itKey = objects.keySet().iterator();
- while(itKey.hasNext()) {
- String className = (String) itKey.next();
- Iterator itObj = doc.getObjects(className).iterator();
- while(itObj.hasNext())
- extractContent(contentText, (BaseObject) itObj.next(), context);
- }
+ LOG.info(doc.getFullName());
+ Map objects = doc.getxWikiObjects();
+ Iterator itKey = objects.keySet().iterator();
+ while (itKey.hasNext()) {
+ String className = (String) itKey.next();
+ Iterator itObj = doc.getObjects(className).iterator();
+ while (itObj.hasNext())
+ extractContent(contentText, (BaseObject) itObj.next(), context);
+ }
} catch (Exception e) {
LOG.error("error getting content from XWiki Objects ", e);
@@ -96,13 +101,13 @@
return contentText.toString();
}
- private void extractContent(StringBuffer contentText, BaseObject baseObject, XWikiContext context){
- try{
- if (baseObject!=null) {
+ private void extractContent(StringBuffer contentText, BaseObject baseObject, XWikiContext context) {
+ try {
+ if (baseObject != null) {
Object[] propertyNames = baseObject.getPropertyNames();
for (int i = 0; i < propertyNames.length; i++) {
BaseProperty baseProperty = (BaseProperty) baseObject.getField((String) propertyNames[i]);
- if ((baseProperty!=null)&&(baseProperty.getValue()!=null)) {
+ if ((baseProperty != null) && (baseProperty.getValue() != null)) {
contentText.append(baseProperty.getValue().toString());
}
contentText.append(" ");
@@ -113,4 +118,92 @@
e.printStackTrace();
}
}
+
+ public void addDataToLuceneDocument(org.apache.lucene.document.Document luceneDoc, XWikiDocument doc,
+ XWikiContext context) {
+
+ super.addDataToLuceneDocument(luceneDoc, doc, context);
+ Map objects = doc.getxWikiObjects();
+ String className;
+ Iterator itObj;
+ BaseObject baseObject;
+ for (Iterator itr = objects.keySet().iterator(); itr.hasNext();) {
+ className = (String) itr.next();
+ itObj = doc.getObjects(className).iterator();
+
+ while (itObj.hasNext()) {
+ baseObject = (BaseObject) itObj.next();
+ Object[] propertyNames = baseObject.getPropertyNames();
+ for (int i = 0; i < propertyNames.length; i++) {
+ try {
+ indexProperty(luceneDoc, baseObject, (String) propertyNames[i], context);
+ } catch (Exception e) {
+ LOG.error("error extracting fulltext for document " + this, e);
+ }
+ }
+ }
+ }
+ }
+
+ private void indexProperty(org.apache.lucene.document.Document luceneDoc, BaseObject baseObject, String propertyName, XWikiContext context) {
+ String fieldFullName = baseObject.getClassName() + "." + propertyName;
+ BaseClass bClass = baseObject.getxWikiClass(context);
+ PropertyInterface prop = bClass.getField(propertyName);
+
+ if (prop instanceof StaticListClass) {
+ indexStaticList(luceneDoc, baseObject, (StaticListClass) prop, propertyName, context);
+ } else {
+ final String ft = getContentAsText(baseObject, propertyName);
+ if (ft != null) {
+ luceneDoc.add(new Field(fieldFullName, ft, Field.Store.YES, Field.Index.TOKENIZED));
+
+ }
+ }
+ }
+
+ private void indexStaticList(org.apache.lucene.document.Document luceneDoc, BaseObject baseObject, StaticListClass prop, String propertyName, XWikiContext context) {
+ Map possibleValues = prop.getMap(context);
+ List keys = baseObject.getListValue(propertyName);
+ String fieldFullName = baseObject.getClassName() + "." + propertyName;
+ Iterator it = keys.iterator();
+ while (it.hasNext()) {
+ String value = (String) it.next();
+ ListItem item = (ListItem) possibleValues.get(value);
+ if (item != null) {
+ // we index the key of the list
+ String fieldName = fieldFullName + ".key";
+ luceneDoc.add(new Field(fieldName, item.getId(), Field.Store.YES, Field.Index.TOKENIZED));
+ //we index the value
+ fieldName = fieldFullName + ".value";
+ luceneDoc.add(new Field(fieldName, item.getValue(), Field.Store.YES, Field.Index.TOKENIZED));
+ if (!item.getId().equals(item.getValue())) {
+ luceneDoc.add(new Field(fieldFullName, item.getValue(), Field.Store.YES, Field.Index.TOKENIZED));
+ }
+ }
+ //we index both if value is not equal to the id(key)
+ luceneDoc.add(new Field(fieldFullName, value, Field.Store.YES, Field.Index.TOKENIZED));
+ }
+ }
+
+ public String getFullText(XWikiDocument doc, BaseObject baseObject, String property, XWikiContext context) {
+ return getContentAsText(baseObject, property);
+ }
+
+ private String getContentAsText(BaseObject baseObject, String property) {
+
+ StringBuffer contentText = new StringBuffer();
+ try {
+ BaseProperty baseProperty;
+ baseProperty = (BaseProperty) baseObject.getField(property);
+ if (baseProperty.getValue() != null) {
+ contentText.append(baseProperty.getValue().toString());
+ }
+ } catch (Exception e) {
+ LOG.error("error getting content from XWiki Objects ", e);
+ e.printStackTrace();
+ }
+ return contentText.toString();
+ }
+
+
}
Modified: xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/TextExtractor.java
===================================================================
--- xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/TextExtractor.java 2006-12-06 15:49:47 UTC (rev 1686)
+++ xwiki-apps/gelc/gelcv1/trunk/gelcplugins/src/main/java/net/jkraemer/xwiki/plugins/lucene/TextExtractor.java 2006-12-06 16:50:25 UTC (rev 1687)
@@ -49,7 +49,7 @@
textExtractors.put ("application/pdf", new PDFTextExtractor ());
// textExtractors.put ("application/vnd.sun.xml.writer", new OpenOfficeTextExtractor ());
textExtractors.put ("application/msword", new MSWordTextExtractor ());
- textExtractors.put ("application/ms-powerpoint", new MSPowerPointTextExtractor ());
+ textExtractors.put ("application/ms-powerpoint", new MSPowerPointTextExtractor());
textExtractors.put ("application/ms-excel", new MSExcelTextExtractor ());
}
More information about the Xwiki-notifications
mailing list