r1690 - in xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin: . lucene lucene/textextraction lucene/textextraction/xmlutil

Jeremi Joslin jeremi at users.forge.objectweb.org
Wed Dec 6 23:29:03 CET 2006


Author: jeremi
Date: 2006-12-06 23:29:02 +0100 (Wed, 06 Dec 2006)
New Revision: 1690

Added:
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/AttachmentData.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/DocumentData.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexData.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexFields.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexRebuilder.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexUpdater.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePlugin.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePluginApi.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/ObjectData.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResult.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResults.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/TextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/XWikiDocumentQueue.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSExcelTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSPowerPointTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSWordTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MimetypeTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/OpenOfficeTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/PDFTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/PlainTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/XmlTextExtractor.java
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/xmlutil/
   xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/xmlutil/XmlEncodingDetector.java
Log:
move the lucene plugin to the core of xwiki

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/AttachmentData.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/AttachmentData.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/AttachmentData.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,179 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 25.01.2005
+ *
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.doc.XWikiAttachment;
+import com.xpn.xwiki.doc.XWikiDocument;
+
+/**
+ * Holds all data but the content of an attachment to be indexed. The content is
+ * retrieved at indexing time, which should save us some memory especially when
+ * rebuilding an index for a big wiki.
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class AttachmentData extends IndexData
+{
+    /**
+     * Mapping from common file name endings to mime types. This is uses as a
+     * fallback when text extraction by using the mime type delivered by xwiki
+     * doesn't work.
+     */
+    static final Map            MIMETYPES = new HashMap ();
+    static
+    {
+        MIMETYPES.put ("pdf", "application/pdf");
+        MIMETYPES.put ("doc", "application/msword");
+        MIMETYPES.put ("sxw", "application/vnd.sun.xml.writer");
+        MIMETYPES.put ("xml", "text/xml");
+        MIMETYPES.put ("txt", "text/plain");
+        MIMETYPES.put ("ppt", "application/ms-powerpoint");
+        MIMETYPES.put ("xls", "application/ms-excel");
+    }
+
+    private static final Logger LOG       = Logger.getLogger (AttachmentData.class);
+    private int                 size;
+    private String              filename;
+
+    /**
+     * @param attachment
+     * @param context
+     */
+    public AttachmentData (final XWikiDocument document, final XWikiAttachment attachment,
+                           final XWikiContext context)
+    {
+        super (attachment.getDoc (), context);
+        setModificationDate (attachment.getDate ());
+        setAuthor (attachment.getAuthor ());
+        setSize (attachment.getFilesize ());
+        setFilename (attachment.getFilename ());
+    }
+
+    /**
+     * @see net.jkraemer.xwiki.plugins.lucene.IndexData#addDataToLuceneDocument(org.apache.lucene.document.Document)
+     */
+    public void addDataToLuceneDocument (Document luceneDoc, XWikiDocument doc, XWikiContext context)
+    {
+        super.addDataToLuceneDocument (luceneDoc, doc, context);
+        if (filename != null) luceneDoc.add (new Field (IndexFields.FILENAME, filename, Field.Store.YES, Field.Index.TOKENIZED));
+    }
+
+    /**
+     * @param size
+     *            The size to set.
+     */
+    public void setSize (int size)
+    {
+        this.size = size;
+    }
+
+    /**
+     * @see net.jkraemer.xwiki.plugins.lucene.IndexData#getType()
+     */
+    public String getType ()
+    {
+        return LucenePlugin.DOCTYPE_ATTACHMENT;
+    }
+
+    /**
+     * @return Returns the filename.
+     */
+    public String getFilename ()
+    {
+        return filename;
+    }
+
+    /**
+     * @param filename
+     *            The filename to set.
+     */
+    public void setFilename (String filename)
+    {
+        this.filename = filename;
+    }
+
+    /**
+     * overridden to append the filename
+     * @see net.jkraemer.xwiki.plugins.lucene.IndexData#getId()
+     */
+    public String getId ()
+    {
+        return new StringBuffer (super.getId ()).append (".").append (filename).toString ();
+    }
+
+    /**
+     * @return a string containing the result of
+     *         {@link IndexData#getFullText(XWikiDocument, XWikiContext)}plus
+     *         the full text content of this attachment, as far as it could be
+     *         extracted.
+     */
+    public String getFullText (XWikiDocument doc, XWikiContext context)
+    {
+        StringBuffer retval = new StringBuffer (super.getFullText (doc, context));
+        String contentText = null;
+        contentText = getContentAsText (doc, context);
+        if (contentText != null)
+        {
+            retval.append (" ").append (contentText).toString ();
+        }
+        return retval.toString ();
+    }
+
+    /**
+     * @param doc
+     * @param context
+     * @param contentText
+     * @return
+     */
+    private String getContentAsText (XWikiDocument doc, XWikiContext context)
+    {
+        String contentText = null;
+        try
+        {
+            XWikiAttachment att = doc.getAttachment (filename);
+            if (LOG.isDebugEnabled ()) LOG.debug ("have attachment for filename " + filename + ": " + att);
+            byte[] content = att.getContent (context);
+            if (filename != null)
+            {
+                String[] nameParts = filename.split ("\\.");
+                if (nameParts.length > 1)
+                {
+                    contentText = TextExtractor.getText (content, (String) MIMETYPES
+                            .get (nameParts[nameParts.length - 1].toLowerCase ()));
+                }
+            }
+        } catch (Exception e)
+        {
+            LOG.error ("error getting content of attachment", e);
+            e.printStackTrace ();
+        }
+        return contentText;
+    }
+
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/AttachmentData.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/DocumentData.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/DocumentData.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/DocumentData.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,70 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 25.01.2005
+ *
+ */
+
+package com.xpn.xwiki.plugin.lucene;
+
+import org.apache.log4j.Logger;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.XWikiException;
+import com.xpn.xwiki.doc.XWikiDocument;
+
+/**
+ * Holds all data but the content of a wiki page to be indexed. The content is
+ * retrieved at indexing time, which should save us some memory especially when
+ * rebuilding an index for a big wiki.
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class DocumentData extends IndexData
+{
+    private static final Logger LOG = Logger.getLogger (DocumentData.class);
+
+    public DocumentData (final XWikiDocument doc, final XWikiContext context)
+    {
+        super (doc, context);
+        setAuthor (doc.getAuthor ());
+        setCreator (doc.getCreator ());
+        setModificationDate (doc.getDate ());
+        setCreationDate (doc.getCreationDate ());
+    }
+
+    /**
+     * @see net.jkraemer.xwiki.plugins.lucene.IndexData#getType()
+     */
+    public String getType ()
+    {
+        return LucenePlugin.DOCTYPE_WIKIPAGE;
+    }
+
+    /**
+     * @return a string containing the result of
+     *         {@link IndexData#getFullText(XWikiDocument, XWikiContext, String)}
+     *         plus the full text content of this document (in the given
+     *         language)
+     */
+    public String getFullText (XWikiDocument doc, XWikiContext context)
+    {
+        return new StringBuffer (super.getFullText (doc, context)).append (" ").append (doc.getContent ())
+                .toString ();
+    }
+
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/DocumentData.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexData.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexData.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexData.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,273 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 25.01.2005
+ *
+ */
+
+package com.xpn.xwiki.plugin.lucene;
+
+import java.util.Date;
+
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.log4j.Logger;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.doc.XWikiDocument;
+
+/**
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public abstract class IndexData
+{
+    private static final Logger LOG = Logger.getLogger (IndexData.class);
+
+    private String              documentName;
+    private String              documentWeb;
+    private String              fullName;
+    private String              author;
+    private Date                creationDate;
+    private String              creator;
+    private String              language;
+    private Date                modificationDate;
+    /** name of the virtual wiki this doc belongs to */
+    private String              wiki;
+
+    public IndexData (final XWikiDocument doc, final XWikiContext context)
+    {
+        setDocumentName (doc.getName ());
+        setDocumentWeb (doc.getWeb ());
+        setWiki (context.getDatabase ());
+        setFullName (new StringBuffer (wiki).append (":").append (documentWeb).append (".")
+                .append (documentName).toString ());
+        setLanguage (doc.getLanguage ());
+    }
+
+    /**
+     * Adds this documents data to a lucene Document instance for indexing.
+     * <p>
+     * <strong>Short introduction to Lucene field types </strong>
+     * </p>
+     * <p>
+     * Which type of Lucene field is used determines what Lucene does with data
+     * and how we can use it for searching and showing search results:
+     * </p>
+     * <ul>
+     * <li>Keyword fields don't get tokenized, but are searchable and stored in
+     * the index. This is perfect for fields you want to search in
+     * programmatically (like ids and such), and date fields. Since all
+     * user-entered queries are tokenized, letting the user search these fields
+     * makes almost no sense, except of queries for date fields, where
+     * tokenization is useless.</li>
+     * <li>the stored text fields are used for short texts which should be
+     * searchable by the user, and stored in the index for reconstruction.
+     * Perfect for document names, titles, abstracts.</li>
+     * <li>the unstored field takes the biggest part of the content - the full
+     * text. It is tokenized and indexed, but not stored in the index. This
+     * makes sense, since when the user wants to see the full content, he clicks
+     * the link to vie the full version of a document, which is then delivered
+     * by xwiki.</li>
+     * </ul>
+     * @param luceneDoc
+     *            if not null, this controls which translated version of the
+     *            content will be indexed. If null, the content in the default
+     *            language will be used.
+     */
+    public void addDataToLuceneDocument (org.apache.lucene.document.Document luceneDoc, XWikiDocument doc,
+                                         XWikiContext context)
+    {
+        // Keyword fields: stored and indexed, but not tokenized
+        luceneDoc.add (new Field(IndexFields.DOCUMENT_ID, getId(), Field.Store.YES, Field.Index.TOKENIZED));
+        luceneDoc.add (new Field(IndexFields.DOCUMENT_LANGUAGE, this.language, Field.Store.YES, Field.Index.TOKENIZED));
+        if (wiki != null && wiki.length () > 0)
+            luceneDoc.add (new Field (IndexFields.DOCUMENT_WIKI, wiki, Field.Store.YES, Field.Index.TOKENIZED));
+        if (getType () != null) luceneDoc.add (new Field (IndexFields.DOCUMENT_TYPE, getType (), Field.Store.YES, Field.Index.TOKENIZED));
+        if (modificationDate != null)
+            luceneDoc.add (new Field(IndexFields.DOCUMENT_DATE, IndexFields
+                    .dateToString (modificationDate), Field.Store.YES, Field.Index.NO));
+        if (creationDate != null)
+            luceneDoc.add (new Field(IndexFields.DOCUMENT_CREATIONDATE, IndexFields
+                    .dateToString (creationDate), Field.Store.YES, Field.Index.NO));
+
+        // stored Text fields: tokenized and indexed
+        luceneDoc.add (new Field(IndexFields.DOCUMENT_NAME, documentName, Field.Store.YES, Field.Index.TOKENIZED));
+        luceneDoc.add (new Field(IndexFields.DOCUMENT_WEB, documentWeb, Field.Store.YES, Field.Index.TOKENIZED));
+        if (author != null) luceneDoc.add (new Field(IndexFields.DOCUMENT_AUTHOR, author, Field.Store.YES, Field.Index.TOKENIZED));
+        if (creator != null) luceneDoc.add (new Field(IndexFields.DOCUMENT_CREATOR, creator, Field.Store.YES, Field.Index.TOKENIZED));
+
+        // UnStored fields: tokenized and indexed, but no reconstruction of
+        // original content will be possible from the search result
+        try
+        {
+            final String ft = getFullText (doc, context);
+            if (ft != null) luceneDoc.add (new Field(IndexFields.FULLTEXT, ft, Field.Store.NO, Field.Index.TOKENIZED));
+        } catch (Exception e)
+        {
+            LOG.error ("error extracting fulltext for document " + this, e);
+        }
+    }
+
+    /**
+     * Builds a Lucene query matching only the document this instance
+     * represents. This is used for removing old versions of a document from the
+     * index before adding a new one.
+     * @return a query matching the field DOCUMENT_ID to the value of #getId()
+     */
+    public Query buildQuery ()
+    {
+        return new TermQuery (new Term (IndexFields.DOCUMENT_ID, getId ()));
+    }
+
+    /**
+     * @return string unique to this document across all languages and virtual
+     *         wikis
+     */
+    public String getId ()
+    {
+        StringBuffer retval = new StringBuffer ();
+        if (wiki != null && wiki.length () > 0) retval.append (wiki).append (":");
+        retval.append (documentWeb).append (".");
+        retval.append (documentName).append (".");
+        retval.append (language);
+        return retval.toString ();
+    }
+
+    /**
+     * @return String of documentName, documentWeb, author and creator
+     */
+    public String getFullText (XWikiDocument doc, XWikiContext context)
+    {
+        StringBuffer sb = new StringBuffer (documentName).append (" ").append (documentWeb).append (" ")
+                .append (author).append (creator);
+        return sb.toString ();
+    }
+
+    public abstract String getType ();
+
+    public String toString ()
+    {
+        return getId ();
+    }
+
+    /**
+     * @param author
+     *            The author to set.
+     */
+    public void setAuthor (String author)
+    {
+        this.author = author;
+    }
+
+    /**
+     * @param documentName
+     *            The documentName to set.
+     */
+    public void setDocumentName (String documentName)
+    {
+        this.documentName = documentName;
+    }
+
+    /**
+     * @param documentWeb
+     *            The documentWeb to set.
+     */
+    public void setDocumentWeb (String documentWeb)
+    {
+        this.documentWeb = documentWeb;
+    }
+
+    /**
+     * @param modificationDate
+     *            The modificationDate to set.
+     */
+    public void setModificationDate (Date modificationDate)
+    {
+        this.modificationDate = modificationDate;
+    }
+
+    public String getDocumentName ()
+    {
+        return documentName;
+    }
+
+    public String getDocumentWeb ()
+    {
+        return documentWeb;
+    }
+
+    public String getWiki ()
+    {
+        return wiki;
+    }
+
+    public void setWiki (String wiki)
+    {
+        this.wiki = wiki;
+    }
+
+    public Date getCreationDate ()
+    {
+        return creationDate;
+    }
+
+    public void setCreationDate (Date creationDate)
+    {
+        this.creationDate = creationDate;
+    }
+
+    public String getCreator ()
+    {
+        return creator;
+    }
+
+    public void setCreator (String creator)
+    {
+        this.creator = creator;
+    }
+
+    /**
+     * @return
+     */
+    public String getFullName ()
+    {
+        return fullName;
+    }
+
+    public void setFullName (String fullName)
+    {
+        this.fullName = fullName;
+    }
+
+    public String getLanguage ()
+    {
+        return language;
+    }
+
+    public void setLanguage (String lang)
+    {
+        if (lang != null && lang.length () > 0)
+            this.language = lang;
+        else
+            this.language = "default";
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexData.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexFields.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexFields.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexFields.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,112 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 28.01.2005
+ *
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.apache.commons.lang.time.FastDateFormat;
+import org.apache.log4j.Logger;
+
+/**
+ * Contains constants naming the Lucene index fields used by this Plugin and
+ * some helper methods for proper handling of special field values like dates.
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public abstract class IndexFields
+{
+
+    /**
+     * Keyword field, holds a string uniquely identifying a document across the
+     * index. this is used for finding old versions of a document to be indexed.
+     */
+    public static final String          DOCUMENT_ID           = "_docid";
+    /** Keyword field, holds the name of the virtual wiki a document belongs to */
+    public static final String          DOCUMENT_WIKI         = "wiki";
+    /** Name of the document */
+    public static final String          DOCUMENT_NAME         = "name";
+    /** Name of the web the document belongs to */
+    public static final String          DOCUMENT_WEB          = "web";
+    /** Language of the document */
+    public static final String          DOCUMENT_LANGUAGE     = "lang";
+    /**
+     * Type of a document, "attachment" or "wikipage", used to control
+     * presentation of searchresults. See {@link SearchResult}and
+     * xdocs/searchResult.vm.
+     */
+    public static final String          DOCUMENT_TYPE         = "type";
+
+    /** Filename, only used for attachments */
+    public static final String          FILENAME              = "filename";
+    /** Last modifier */
+    public static final String          DOCUMENT_AUTHOR       = "author";
+    /** Creator of the document */
+    public static final String          DOCUMENT_CREATOR      = "creator";
+    /** Date of last modification */
+    public static final String          DOCUMENT_DATE         = "date";
+    /** Date of creation */
+    public static final String          DOCUMENT_CREATIONDATE = "creationdate";
+    /**
+     * Fulltext content, not stored (and can therefore not be restored from the
+     * index).
+     */
+    public static final String          FULLTEXT              = "ft";
+    /** not in use */
+    public static final String          KEYWORDS              = "kw";
+    /**
+     * Format for date storage in the index, and therefore the format which has
+     * to be used for date-queries.
+     */
+    public static final String          DATE_FORMAT           = "yyyyMMddHHmm";
+
+    private static final FastDateFormat df                    = FastDateFormat
+                                                                      .getInstance (IndexFields.DATE_FORMAT);
+    private static final Logger         LOG                   = Logger.getLogger (IndexFields.class);
+
+    public static final String dateToString (Date date)
+    {
+        return df.format (date);
+    }
+
+    public static final Date stringToDate (String dateValue)
+    {
+        SimpleDateFormat sdf = new SimpleDateFormat (DATE_FORMAT);
+        try
+        {
+            return sdf.parse (dateValue);
+        } catch (Exception e)
+        {
+            // silently ignore
+        }
+        return null;
+    }
+
+    /**
+     * 
+     */
+    private IndexFields ()
+    {
+        super ();
+        // TODO Auto-generated constructor stub
+    }
+
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexFields.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexRebuilder.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexRebuilder.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexRebuilder.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,231 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 01.02.2005
+ *
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import java.util.*;
+
+import org.apache.log4j.Logger;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.XWikiException;
+import com.xpn.xwiki.objects.BaseObject;
+import com.xpn.xwiki.api.XWiki;
+import com.xpn.xwiki.doc.XWikiAttachment;
+import com.xpn.xwiki.doc.XWikiDocument;
+
+/**
+ * Handles rebuilding of the whole Index. This involves the following steps:
+ * <ul>
+ * <li>empty the existing index</li>
+ * <li>retrieve the names of all virtual wikis</li>
+ * <li>get and index all documents for each virtual wiki</li>
+ * <li>get and index all translations of each document</li>
+ * <li>get and index all attachments of each document</li>
+ * </ul>
+ * The indexing of all content fetched from the wiki is triggered by handing the
+ * data to the indexUpdater thread.
+ *
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class IndexRebuilder {
+    private IndexUpdater indexUpdater;
+    private static final Logger LOG = Logger.getLogger(IndexRebuilder.class);
+
+    /**
+     * First empties the index, then fetches all Documents, their translations
+     * and their attachments for re-addition to the index.
+     *
+     * @param wiki
+     * @param context
+     * @return total number of documentes and attachments successfully added to
+     *         the indexer queue, -1 when errors occured.
+     * @throws XWikiException
+     * @todo TODO: give more detailed results
+     */
+    public int rebuildIndex(com.xpn.xwiki.api.XWiki wiki, XWikiContext context) {
+        indexUpdater.cleanIndex();
+        int retval = 0;
+        Collection wikiServers;
+        com.xpn.xwiki.XWiki xwiki = context.getWiki();
+        if (wiki.isVirtual()) {
+            wikiServers = findWikiServers(wiki, context);
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("found " + wikiServers.size() + " virtual wikis:");
+                for (Iterator iter = wikiServers.iterator(); iter.hasNext();) {
+                    LOG.debug(iter.next());
+                }
+            }
+        } else {
+            // no virtual wiki configuration, just index the wiki the context
+            // belongs to
+            wikiServers = new ArrayList();
+            ((ArrayList) wikiServers).add(context.getDatabase());
+        }
+        // Iterate all found virtual wikis
+        for (Iterator iter = wikiServers.iterator(); iter.hasNext();) {
+            int wikiResult = indexWiki(xwiki, (String) iter.next());
+            if (retval != -1) retval += wikiResult;
+        }
+        return retval;
+    }
+
+    /**
+     * Adds the content of a given wiki to the indexUpdater's queue.
+     *
+     * @param xwiki
+     * @param context
+     * @param wikiName
+     * @return
+     */
+    protected int indexWiki(com.xpn.xwiki.XWiki xwiki, String wikiName) {
+        LOG.info("reading content of wiki " + wikiName);
+        int retval = 0;
+        XWikiContext wikiContext = new XWikiContext();
+        wikiContext.setWiki(xwiki);
+        wikiContext.setDatabase(wikiName);
+        Collection docNames = null;
+        try {
+            docNames = xwiki.getStore().searchDocumentsNames("", wikiContext);
+        } catch (XWikiException e1) {
+            LOG.error("error getting document names for wiki " + wikiName);
+            e1.printStackTrace();
+            return -1;
+        }
+        for (Iterator iterator = docNames.iterator(); iterator.hasNext();) {
+            String docName = (String) iterator.next();
+            XWikiDocument document;
+            try {
+                document = xwiki.getDocument(docName, wikiContext);
+            } catch (XWikiException e2) {
+                LOG.error("error fetching document " + wikiName + ":" + docName);
+                e2.printStackTrace();
+                continue;
+            }
+            if (document != null) {
+                indexUpdater.add(document, wikiContext);
+                retval++;
+                retval += addTranslationsOfDocument(document, wikiContext);
+                retval += addAttachmentsOfDocument(document, wikiContext);
+                retval += addObjectsOfDocument(document, wikiContext);
+            } else {
+                LOG.info("XWiki delivered null for document name " + wikiName + ":" + docName);
+            }
+        }
+        return retval;
+    }
+
+    /**
+     * Getting the content(values of title/category/content/extract properties ) from the XWiki.ArticleClass objects
+     * @param document
+     * @param wikiContext
+     */
+    private int addObjectsOfDocument(XWikiDocument document, XWikiContext wikiContext) {
+        int retval = 0;
+        Map xwikiObjects = document.getxWikiObjects();
+        if (document.hasElement(XWikiDocument.HAS_OBJECTS)) {
+            retval += xwikiObjects.size();
+            indexUpdater.addObject(document, wikiContext);
+        }
+        return retval;
+    }
+
+    /**
+     * @param document
+     * @param wikiContext
+     */
+    private int addAttachmentsOfDocument(XWikiDocument document, XWikiContext wikiContext) {
+        int retval = 0;
+        final List attachmentList = document.getAttachmentList();
+        retval += attachmentList.size();
+        for (Iterator attachmentIter = attachmentList.iterator(); attachmentIter.hasNext();) {
+            try {
+                XWikiAttachment attachment = (XWikiAttachment) attachmentIter.next();
+                indexUpdater.add(document, attachment, wikiContext);
+            } catch (Exception e) {
+                LOG.error("error retrieving attachment of document " + document.getFullName(), e);
+            }
+        }
+        return retval;
+    }
+
+    /**
+     * @param document
+     * @param wikiContext
+     * @throws XWikiException
+     */
+    protected int addTranslationsOfDocument(XWikiDocument document, XWikiContext wikiContext) {
+        int retval = 0;
+        List translations;
+        try {
+            translations = document.getTranslationList(wikiContext);
+        } catch (XWikiException e) {
+            LOG.error("error getting list of translations from document " + document.getFullName(), e);
+            e.printStackTrace();
+            return 0;
+        }
+        for (Iterator iter = translations.iterator(); iter.hasNext();) {
+            String lang = (String) iter.next();
+            try {
+                indexUpdater.add(document.getTranslatedDocument(lang, wikiContext), wikiContext);
+                retval++;
+            } catch (XWikiException e1) {
+                LOG.error("error getting translated document for document " + document.getFullName()
+                        + " and language " + lang);
+                e1.printStackTrace();
+            }
+        }
+        return retval;
+    }
+
+    /**
+     * @param wiki
+     * @return
+     */
+    private Collection findWikiServers(XWiki wiki, XWikiContext context) {
+        List retval = new ArrayList();
+        final String hql = ", BaseObject as obj, StringProperty as prop "
+                + "where doc.fullName=obj.name and obj.className='XWiki.XWikiServerClass'"
+                + " and prop.id.id = obj.id " + "and prop.id.name = 'server'";
+        List result = null;
+        try {
+            result = wiki.getXWiki().getStore().searchDocumentsNames(hql, context);
+        } catch (Exception e) {
+            LOG.error("error getting list of wiki servers!");
+        }
+        if (result != null) {
+            for (Iterator iter = result.iterator(); iter.hasNext();) {
+                String docname = (String) iter.next();
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("possible server name: " + docname);
+                }
+                if (docname.startsWith("XWiki.XWikiServer")) {
+                    retval.add(docname.substring("XWiki.XWikiServer".length()).toLowerCase());
+                }
+            }
+        }
+        return retval;
+    }
+
+    public void setIndexUpdater(IndexUpdater indexUpdater) {
+        this.indexUpdater = indexUpdater;
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexRebuilder.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexUpdater.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexUpdater.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexUpdater.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,431 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 21.01.2005
+ *
+ */
+
+package com.xpn.xwiki.plugin.lucene;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.log4j.MDC;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.document.Field;
+
+import com.xpn.xwiki.XWiki;
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.objects.BaseObject;
+import com.xpn.xwiki.doc.XWikiAttachment;
+import com.xpn.xwiki.doc.XWikiDocument;
+import com.xpn.xwiki.notify.XWikiActionNotificationInterface;
+import com.xpn.xwiki.notify.XWikiDocChangeNotificationInterface;
+import com.xpn.xwiki.notify.XWikiNotificationRule;
+
+/**
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class IndexUpdater implements Runnable, XWikiDocChangeNotificationInterface,
+        XWikiActionNotificationInterface {
+
+    private static final Logger LOG = Logger.getLogger(IndexUpdater.class);
+
+    /**
+     * Milliseconds of sleep between checks for changed documents
+     */
+    private int indexingInterval = 300000;
+    private boolean exit = false;
+    private IndexWriter writer;
+    private String indexDir;
+    private XWikiDocumentQueue queue = new XWikiDocumentQueue();
+    private Analyzer analyzer;
+    private LucenePlugin plugin;
+    private IndexSearcher searcher;
+    private IndexReader reader;
+
+    private XWikiContext context;
+    private XWiki xwiki;
+
+    static List fields = new ArrayList();
+    
+
+    public void doExit() {
+        exit = true;
+    }
+
+    /**
+     * Main loop. Polls the queue for documents to be indexed.
+     *
+     * @see java.lang.Runnable#run()
+     */
+    public void run() {
+        MDC.put("url", "index updating thread");
+
+        while (!exit) {
+            if (queue.isEmpty()) {
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("IndexUpdater: queue empty, nothing to do");
+                }
+            } else {
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("IndexUpdater: documents in queue, start indexing");
+                }
+                // we need a searcher to find old versions of documents
+                openSearcher();
+                openWriter(false);
+                List oldDocs = new ArrayList();
+
+                while (!queue.isEmpty()) {
+                    IndexData data = queue.remove();
+
+                    try {
+                        oldDocs.addAll(getOldIndexDocIds(data));
+                        XWikiDocument doc = xwiki.getDocument(data.getFullName(), context);
+                        addToIndex(data, doc, context);
+                    } catch (Exception e) {
+                        LOG.error("error retrieving doc from own context: " + e.getMessage(), e);
+                        e.printStackTrace();
+                    }
+                }
+                closeWriter();
+                // the following searcher close/open cycle is necessary because
+                // the old reader is not valid for document deletion anymore
+                // after
+                // updating the index
+                closeSearcher();
+                openSearcher();
+                deleteOldDocs(oldDocs);
+                closeSearcher();
+                // readers and searchers should be reopened after index update
+                plugin.openSearchers();
+            }
+            try {
+                Thread.sleep(indexingInterval);
+            } catch (InterruptedException e) {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        xwiki.getStore().cleanUp(context);
+        MDC.remove("url");
+    }
+
+    private synchronized void closeSearcher() {
+        try {
+            if (searcher != null) searcher.close();
+            if (reader != null) reader.close();
+        } catch (IOException e) {
+            LOG.error("error closing index searcher", e);
+            e.printStackTrace();
+        } finally {
+            searcher = null;
+            reader = null;
+        }
+    }
+
+    /**
+     * Opens the index reader and searcher used for finding and deleting old
+     * versions of indexed documents.
+     */
+    private synchronized void openSearcher() {
+        try {
+            reader = IndexReader.open(indexDir);
+            searcher = new IndexSearcher(reader);
+        } catch (IOException e) {
+            LOG.error("error opening index searcher", e);
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * Deletes the documents with the given ids from the index.
+     *
+     * @param oldDocs
+     */
+    private void deleteOldDocs(List oldDocs) {
+        for (Iterator iter = oldDocs.iterator(); iter.hasNext();) {
+            Integer id = (Integer) iter.next();
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("delete doc " + id);
+            }
+            try {
+                reader.deleteDocument(id.intValue());
+            } catch (IOException e1) {
+                LOG.error("error deleting doc " + id, e1);
+                e1.printStackTrace();
+            }
+        }
+    }
+
+    /**
+     * @param data
+     * @return
+     */
+    private Collection getOldIndexDocIds(IndexData data) {
+        List retval = new ArrayList(3);
+        Query query = data.buildQuery();
+        try {
+            Hits hits = searcher.search(query);
+            for (int i = 0; i < hits.length(); i++) {
+                retval.add(new Integer(hits.id(i)));
+            }
+        } catch (IOException e) {
+            LOG.error("error looking for old versions of document " + data + " with query " + query, e);
+            e.printStackTrace();
+        }
+        return retval;
+    }
+
+    /**
+     *
+     */
+    private void openWriter(boolean create) {
+        if (writer != null) {
+            LOG.error("Writer already open and createWriter called");
+            return;
+        }
+        try {
+            // fix for windows by Daniel Cortes:
+            FSDirectory f = FSDirectory.getDirectory(indexDir, false);
+            writer = new IndexWriter(f, analyzer, create);
+            //writer = new IndexWriter (indexDir, analyzer, create);
+            writer.setUseCompoundFile(true);
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("successfully opened index writer : " + indexDir);
+            }
+        } catch (IOException e) {
+            LOG.error("IOException when opening Lucene Index for writing at " + indexDir, e);
+        }
+    }
+
+    /**
+     *
+     */
+    private void closeWriter() {
+        if (writer == null) {
+            LOG.error("Writer not open and closeWriter called");
+            return;
+        }
+        try {
+            writer.optimize();
+        } catch (IOException e1) {
+            LOG.error("Exception caught when optimizing Index", e1);
+        }
+        try {
+            writer.close();
+        } catch (Exception e) {
+            LOG.error("Exception caught when closing IndexWriter", e);
+        }
+        writer = null;
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("closed writer.");
+        }
+
+    }
+
+    /**
+     * @param doc
+     * @throws IOException
+     */
+    private void addToIndex(IndexData data, XWikiDocument doc, XWikiContext context) throws IOException {
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("addToIndex: " + data);
+        }
+        org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
+        data.addDataToLuceneDocument(luceneDoc, doc, context);
+        Field fld = null;
+        // collecting all the fields for using up in search
+        for (Enumeration e = luceneDoc.fields(); e.hasMoreElements();) {
+            fld = (Field) e.nextElement();
+            if (!fields.contains(fld.name())) {
+                fields.add(fld.name());
+            }
+        }
+        writer.addDocument(luceneDoc);
+    }
+
+    /**
+     * @param indexDir The indexDir to set.
+     */
+    public void setIndexDir(String indexDir) {
+        this.indexDir = indexDir;
+    }
+
+    /**
+     * @param analyzer The analyzer to set.
+     */
+    public void setAnalyzer(Analyzer analyzer) {
+        this.analyzer = analyzer;
+    }
+
+    /**
+     * @param config
+     */
+    public synchronized void init(Properties config, LucenePlugin plugin, XWiki wiki) {
+        this.xwiki = wiki;
+        this.context = new XWikiContext();
+        this.context.setWiki(xwiki);
+        this.context.setDatabase(xwiki.getDatabase());
+        this.plugin = plugin;
+        // take the first configured index dir as the one for writing
+        String[] indexDirs = StringUtils.split(config.getProperty(LucenePlugin.PROP_INDEX_DIR), " ,");
+        if (indexDirs != null && indexDirs.length > 0) {
+            this.indexDir = indexDirs[0];
+            File f = new File(indexDir);
+            if (!f.isDirectory()) {
+                f.mkdirs();
+                cleanIndex();
+            }
+        }
+        indexingInterval = 1000 * Integer.parseInt(config.getProperty(LucenePlugin.PROP_INDEXING_INTERVAL,
+                "300"));
+        openSearcher();
+    }
+
+    /**
+     *
+     */
+    public void cleanIndex() {
+        LOG.info("trying to clear index for rebuilding");
+        while (writer != null) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("waiting for existing index writer to close");
+            }
+            try {
+                Thread.sleep(1000);
+            } catch (InterruptedException e) {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
+        synchronized (this) {
+            openWriter(true);
+            closeWriter();
+        }
+    }
+
+    /**
+     * @param document
+     */
+    public void add(XWikiDocument document, XWikiContext context) {
+        queue.add(new DocumentData(document, context));
+        if (document.hasElement(XWikiDocument.HAS_OBJECTS)) {
+            addObject(document, context);
+        }
+    }
+
+    /**
+     * @param document
+     * @param context
+     */
+    public void addObject(XWikiDocument document, XWikiContext context) {
+        queue.add(new ObjectData(document, context));
+    }
+
+    /**
+     * @param attachment
+     */
+    public void add(XWikiDocument document, XWikiAttachment attachment, XWikiContext context) {
+        if (document != null && attachment != null && context != null)
+            queue.add(new AttachmentData(document, attachment, context));
+        else
+            LOG.error("invalid parameters given to add: " + document + ", " + attachment + ", " + context);
+    }
+
+
+    public int addAttachmentsOfDocument(XWikiDocument document, XWikiContext context) {
+        int retval = 0;
+        final List attachmentList = document.getAttachmentList();
+        retval += attachmentList.size();
+        for (Iterator attachmentIter = attachmentList.iterator(); attachmentIter.hasNext();) {
+            try {
+                XWikiAttachment attachment = (XWikiAttachment) attachmentIter.next();
+                add(document, attachment, context);
+            } catch (Exception e) {
+                LOG.error("error retrieving attachment of document " + document.getFullName(), e);
+            }
+        }
+        return retval;
+    }
+
+
+    /**
+     * Notification of changes in document content
+     *
+     * @see com.xpn.xwiki.notify.XWikiNotificationInterface#notify(com.xpn.xwiki.notify.XWikiNotificationRule,
+     *com.xpn.xwiki.doc.XWikiDocument,com.xpn.xwiki.doc.XWikiDocument,
+     *int,com.xpn.xwiki.XWikiContext)
+     */
+    public void notify(XWikiNotificationRule rule, XWikiDocument newDoc, XWikiDocument oldDoc, int event,
+                       XWikiContext context) {
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("notify from XWikiDocChangeNotificationInterface, event=" + event + ", newDoc="
+                    + newDoc + " oldDoc=" + oldDoc);
+        }
+        try {
+            add(newDoc, context);
+        } catch (Exception e) {
+            LOG.error("error in notify", e);
+        }
+    }
+
+    /**
+     * Notification of attachment uploads.
+     *
+     * @see com.xpn.xwiki.notify.XWikiActionNotificationInterface#notify(com.xpn.xwiki.notify.XWikiNotificationRule,
+     *com.xpn.xwiki.doc.XWikiDocument,java.lang.String,
+     *com.xpn.xwiki.XWikiContext)
+     */
+    public void notify(XWikiNotificationRule arg0, XWikiDocument doc, String action, XWikiContext context) {
+        if ("upload".equals(action)) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("upload action notification for doc " + doc.getName());
+            }
+            try {
+                List attachments = doc.getAttachmentList();
+                // find out the most recently changed attachment
+                XWikiAttachment newestAttachment = null;
+                for (Iterator iter = attachments.iterator(); iter.hasNext();) {
+                    XWikiAttachment attachment = (XWikiAttachment) iter.next();
+                    if (newestAttachment != null
+                            && attachment.getDate().before(newestAttachment.getDate()))
+                        newestAttachment = attachment;
+                    else
+                        newestAttachment = attachment;
+                }
+                add(doc, newestAttachment, context);
+            } catch (Exception e) {
+                LOG.error("error in notify", e);
+            }
+        }
+    }
+
+    public long getQueueSize() {
+        return queue.getSize();
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/IndexUpdater.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePlugin.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePlugin.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePlugin.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,313 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 21.01.2005
+ *
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.MultiFieldQueryParser;
+import org.apache.lucene.search.*;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.doc.XWikiDocument;
+import com.xpn.xwiki.doc.XWikiAttachment;
+import com.xpn.xwiki.api.Api;
+import com.xpn.xwiki.api.XWiki;
+import com.xpn.xwiki.notify.DocChangeRule;
+import com.xpn.xwiki.notify.XWikiActionRule;
+import com.xpn.xwiki.plugin.XWikiDefaultPlugin;
+import com.xpn.xwiki.plugin.XWikiPluginInterface;
+
+public class LucenePlugin extends XWikiDefaultPlugin implements XWikiPluginInterface {
+    public static final String DOCTYPE_WIKIPAGE = "wikipage";
+    public static final String DOCTYPE_ATTACHMENT = "attachment";
+
+    private static final Logger LOG = Logger.getLogger(LucenePlugin.class);
+    private Analyzer analyzer;
+    private IndexUpdater indexUpdater;
+    private Thread indexUpdaterThread;
+    protected Properties config;
+
+    public static final String PROP_INDEX_DIR = "xwiki.plugins.lucene.indexdir";
+    public static final String PROP_ANALYZER = "xwiki.plugins.lucene.analyzer";
+    public static final String PROP_INDEXING_INTERVAL = "xwiki.plugins.lucene.indexinterval";
+
+    private static final String DEFAULT_ANALYZER = "org.apache.lucene.analysis.de.GermanAnalyzer";
+    private Searcher[] searchers;
+    private String indexDirs;
+    private IndexRebuilder indexRebuilder;
+
+    public LucenePlugin(String name, String className, XWikiContext context) {
+        super(name, className, context);
+        init(context);
+    }
+
+    /**
+     * @see java.lang.Object#finalize()
+     */
+    protected void finalize() throws Throwable {
+        if (indexUpdater != null) indexUpdater.doExit();
+        super.finalize();
+    }
+
+    public synchronized int rebuildIndex(com.xpn.xwiki.api.XWiki wiki, XWikiContext context) {
+        return indexRebuilder.rebuildIndex(wiki, context);
+    }
+
+    /**
+     * Allows to search special named lucene indexes without having to configure
+     * them in xwiki.cfg. Slower than {@link #getSearchResults}since
+     * new index searcher instances are created for every query.
+     *
+     * @param query       query string
+     * @param myIndexDirs comma separated list of directories containing the lucene
+     *                    indexes to search.
+     * @param languages   comma separated list of language codes to search in, may be
+     *                    null to search all languages
+     * @param wiki
+     * @return
+     * @throws Exception
+     */
+    public SearchResults getSearchResultsFromIndexes(String query, String myIndexDirs, String languages,
+                                                     XWiki wiki) throws Exception {
+        Searcher[] mySearchers = createSearchers(myIndexDirs);
+        SearchResults retval = search(query, null, languages, mySearchers, wiki);
+        closeSearchers(mySearchers);
+        return retval;
+    }
+
+    /**
+     * Searches all Indexes configured in xwiki.cfg (property
+     * <code>xwiki.plugins.lucene.indexdir</code>)
+     *
+     * @param query            query String entered into a search form
+     * @param wiki             XWiki
+     * @param virtualWikiNames Name of the virtual Wiki to search, global search when null
+     * @param languages        comma separated list of language codes to search in, may be
+     *                         null to search all languages
+     * @return Searchresults as a collection of Maps
+     * @throws Exception in case of error(s)
+     */
+    public SearchResults getSearchResults(String query, String virtualWikiNames, String languages, XWiki wiki)
+            throws Exception {
+        return search(query, virtualWikiNames, languages, this.searchers, wiki);
+    }
+
+    /**
+     * @param query
+     * @param indexes
+     * @param virtualWikiNames comma separated list of virtual wiki names to search in, may
+     *                         be null to search all virtual wikis
+     * @param languages        comma separated list of language codes to search in, may be
+     *                         null to search all languages
+     * @return
+     * @throws IOException
+     * @throws ParseException
+     */
+    private SearchResults search(String query, String virtualWikiNames, String languages,
+                                 Searcher[] indexes, XWiki wiki) throws IOException, ParseException {
+        MultiSearcher searcher = new MultiSearcher(indexes);
+        Query q = buildQuery(query, virtualWikiNames, languages);
+        Hits hits = searcher.search(q);
+        final int hitcount = hits.length();
+        if (LOG.isDebugEnabled()) LOG.debug("query " + q + " returned " + hitcount + " hits");
+        return new SearchResults(hits, wiki);
+    }
+
+    /**
+     * @param query
+     * @param virtualWikiNames comma separated list of virtual wiki names
+     * @param languages        comma separated list of language codes to search in, may be
+     *                         null to search all languages
+     * @throws ParseException
+     */
+    private Query buildQuery(String query, String virtualWikiNames, String languages) throws ParseException {
+        // build a query like this: <user query string> AND <wikiNamesQuery> AND
+        // <languageQuery>
+        BooleanQuery bQuery = new BooleanQuery();
+        Query parsedQuery = null;
+
+        // for object search
+        if (query.contains(":")) {
+            String property = query.substring(0, query.indexOf(":"));
+            query = query.substring(query.indexOf(":") + 1, query.length());
+            QueryParser qp = new QueryParser(property, analyzer);
+            parsedQuery = qp.parse(query);
+            bQuery.add(parsedQuery, BooleanClause.Occur.MUST);
+            return bQuery;
+        }
+
+        //for fulltext search
+        List fieldList = IndexUpdater.fields;
+        String[] fields = (String[]) fieldList.toArray(new String[fieldList.size()]);
+        BooleanClause.Occur[] flags = new BooleanClause.Occur[fields.length];
+        for (int i = 0; i < flags.length; i++) {
+            flags[i] = BooleanClause.Occur.SHOULD;
+        }
+        parsedQuery = MultiFieldQueryParser.parse(query, fields, flags, analyzer);
+        bQuery.add(parsedQuery, BooleanClause.Occur.MUST);
+        
+        if (virtualWikiNames != null && virtualWikiNames.length() > 0) {
+            bQuery.add(buildOredTermQuery(virtualWikiNames, IndexFields.DOCUMENT_WIKI), BooleanClause.Occur.SHOULD);
+        }
+        if (languages != null && languages.length() > 0) {
+            bQuery.add(buildOredTermQuery(languages, IndexFields.DOCUMENT_LANGUAGE), BooleanClause.Occur.SHOULD);
+        }
+        return bQuery;
+    }
+
+    /**
+     * @param values comma separated list of values to look for
+     * @return A query returning documents matching one of the given values in
+     *         the given field
+     */
+    private Query buildOredTermQuery(final String values, final String fieldname) {
+        String[] valueArray = values.split("\\,");
+        if (valueArray.length > 1) {
+            // build a query like this: <valueArray[0]> OR <valueArray[1]> OR ...
+            BooleanQuery orQuery = new BooleanQuery();
+            for (int i = 0; i < valueArray.length; i++) {
+                orQuery.add(new TermQuery(new Term(fieldname, valueArray[i].trim())), BooleanClause.Occur.SHOULD);
+            }
+            return orQuery;
+        }
+        // exactly one value, no OR'ed Terms necessary
+        return new TermQuery(new Term(fieldname, valueArray[0]));
+    }
+
+    public synchronized void init(XWikiContext context) {
+        super.init(context);
+        if (LOG.isDebugEnabled()) LOG.debug("lucene plugin: in init");
+        config = context.getWiki().getConfig();
+        try {
+            analyzer = (Analyzer) Class.forName(config.getProperty(PROP_ANALYZER, DEFAULT_ANALYZER))
+                    .newInstance();
+        } catch (Exception e) {
+            e.printStackTrace();
+            LOG.error("error instantiating analyzer : ", e);
+            LOG.warn("using default analyzer class: " + DEFAULT_ANALYZER);
+            try {
+                analyzer = (Analyzer) Class.forName(DEFAULT_ANALYZER).newInstance();
+            } catch (Exception e1) {
+                e1.printStackTrace();
+                throw new RuntimeException("instantiation of default analyzer " + DEFAULT_ANALYZER
+                        + " failed", e1);
+            }
+        }
+        this.indexDirs = config.getProperty(PROP_INDEX_DIR);
+        openSearchers();
+        indexUpdater = new IndexUpdater();
+        indexUpdater.setAnalyzer(analyzer);
+        indexUpdater.init(config, this, context.getWiki());
+        indexUpdaterThread = new Thread(indexUpdater);
+        indexUpdaterThread.start();
+        indexRebuilder = new IndexRebuilder();
+        indexRebuilder.setIndexUpdater(indexUpdater);
+        context.getWiki().getNotificationManager().addGeneralRule(new DocChangeRule(indexUpdater));
+        context.getWiki().getNotificationManager().addGeneralRule(new XWikiActionRule(indexUpdater));
+        LOG.info("lucene plugin initialized.");
+    }
+
+    public String getName() {
+        return "lucene";
+    }
+
+    public Api getPluginApi(XWikiPluginInterface plugin, XWikiContext context) {
+        return new LucenePluginApi((LucenePlugin) plugin, context);
+    }
+
+    /**
+     * Creates an array of Searchers for a number of lucene indexes.
+     *
+     * @param indexDirs Comma separated list of Lucene index directories to create
+     *                  searchers for.
+     * @return Array of searchers
+     * @throws Exception
+     */
+    public static Searcher[] createSearchers(String indexDirs) throws Exception {
+        String[] dirs = StringUtils.split(indexDirs, " ,");
+        List searchersList = new ArrayList();
+        for (int i = 0; i < dirs.length; i++) {
+            try {
+                IndexReader reader = IndexReader.open(dirs[i]);
+                searchersList.add(new IndexSearcher(reader));
+            } catch (IOException e) {
+                LOG.error("cannot open index " + dirs[i], e);
+                e.printStackTrace();
+            }
+        }
+        return (Searcher[]) searchersList.toArray(new Searcher[searchersList.size()]);
+    }
+
+    /**
+     * Opens the searchers for the configured index Dirs after closing any
+     * already existing ones.
+     */
+    protected synchronized void openSearchers() {
+        try {
+            closeSearchers(this.searchers);
+            this.searchers = createSearchers(indexDirs);
+        } catch (Exception e1) {
+            LOG.error("error opening searchers for index dirs " + config.getProperty(PROP_INDEX_DIR), e1);
+            throw new RuntimeException("error opening searchers for index dirs "
+                    + config.getProperty(PROP_INDEX_DIR), e1);
+        }
+    }
+
+    /**
+     * @throws IOException
+     */
+    protected static void closeSearchers(Searcher[] searchers) throws IOException {
+        if (searchers != null) {
+            for (int i = 0; i < searchers.length; i++) {
+                if (searchers[i] != null) searchers[i].close();
+            }
+        }
+    }
+
+    public long getQueueSize() {
+        return indexUpdater.getQueueSize();
+    }
+
+    public void queueDocument(XWikiDocument doc, XWikiContext context) {
+        indexUpdater.add(doc, context);
+    }
+
+    public void queueAttachment(XWikiDocument doc, XWikiAttachment attach, XWikiContext context) {
+        indexUpdater.add(doc, attach, context);
+    }
+
+    public void queueAttachment(XWikiDocument doc, XWikiContext context) {
+        indexUpdater.addAttachmentsOfDocument(doc, context);
+    }
+
+}
\ No newline at end of file


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePlugin.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePluginApi.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePluginApi.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePluginApi.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,197 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 21.01.2005
+ *
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import org.apache.log4j.Logger;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.api.Api;
+import com.xpn.xwiki.api.Context;
+
+/**
+ * This is the main interface for using the Plugin. It basically acts as a
+ * facade to the {@link LucenePlugin}class.
+ * <p>
+ * The methods intended for use in wiki pages are
+ * </p>
+ * <ul>
+ * <li>{@link #rebuildIndex(com.xpn.xwiki.api.XWiki, Context)}for rebuilding
+ * the whole index</li>
+ * <li>{@link #getSearchResults(String, String, com.xpn.xwiki.api.XWiki)}for
+ * searching the index</li>
+ * <li>
+ * {@link #getSearchResults(String, String, String, com.xpn.xwiki.api.XWiki)}
+ * for searching specific virtual wikis</li>
+ * <li>and
+ * {@link #getSearchResultsFromIndexes(String, String, String, com.xpn.xwiki.api.XWiki)}
+ * for searching other lucene indexes than thos configured in
+ * <code>xwiki.cfg</code></li>
+ * </ul>
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class LucenePluginApi extends Api
+{
+    private LucenePlugin        plugin;
+    private static final Logger LOG = Logger.getLogger (LucenePluginApi.class);
+
+    public LucenePluginApi (LucenePlugin plugin, XWikiContext context)
+    {
+        super (context);
+        setPlugin (plugin);
+    }
+
+    /**
+     * Starts a rebuild of the whole index.
+     * @param wiki
+     * @param context
+     * @return Number of documents scheduled for indexing. -1 in case of errors
+     */
+    public int rebuildIndex (com.xpn.xwiki.api.XWiki wiki, Context context)
+    {
+        if (wiki.hasAdminRights ())
+        {
+            return getPlugin().rebuildIndex (wiki, context.getContext());
+        }
+        LOG.info ("access denied to rebuildIndex: insufficient rights");
+        return -1;
+    }
+
+    /**
+     * Searches the named indexes using the given query for documents in the
+     * given languages
+     * @param query
+     *            the query entered by the user
+     * @param indexDirs
+     *            comma separated list of lucene index directories to search in
+     * @param languages
+     *            comma separated list of language codes to search in, may be
+     *            null to search all languages
+     * @param wiki
+     *            reference to xwiki
+     * @return {@link SearchResults}instance containing the results.
+     */
+    public SearchResults getSearchResultsFromIndexes (String query, String indexDirs, String languages,
+                                                      com.xpn.xwiki.api.XWiki wiki)
+    {
+        try
+        {
+            return getPlugin ().getSearchResults (query, indexDirs, languages, wiki);
+        } catch (Exception e)
+        {
+            e.printStackTrace ();
+        } // end of try-catch
+        return null;
+    }
+
+    /**
+     * Searches the configured Indexes using the specified lucene query for
+     * documents in the given languages.
+     * <p>
+     * With virtual wikis enabled in your xwiki installation this will deliver
+     * results from all virtuall wikis. For searching in a subset of your
+     * virtual wikis see
+     * {@link #getSearchResults(String, String, String, com.xpn.xwiki.api.XWiki)}
+     * </p>
+     * @param query
+     *            query entered by the user
+     * @param languages
+     *            comma separated list of language codes to search in, may be
+     *            null to search all languages. Language codes can be:
+     *            <ul>
+     *            <li><code>default</code> for content having no specific
+     *            language information</li>
+     *            <li>lower case 2-letter language codes like <code>en</code>,
+     *            <code>de</code> as used by xwiki</li>
+     *            </ul>
+     * @return a {@link SearchResults}instance containing the results.
+     */
+    public SearchResults getSearchResults (String query, String languages, com.xpn.xwiki.api.XWiki wiki)
+    {
+        return getSearchResultsFromIndexes (query, null, languages, wiki);
+    }
+
+    /**
+     * Searches the configured Indexes using the specified lucene query for
+     * documents in the given languages belonging to one of the given virtual
+     * wikis.
+     * <p>
+     * Using this method only makes sense with virtual wikis enabled. Otherwise
+     * use {@link #getSearchResults(String, String, com.xpn.xwiki.api.XWiki)}
+     * instead.
+     * </p>
+     * @param query
+     *            query entered by the user
+     * @param virtualWikiNames
+     *            Names of the virtual wikis to search in. May be null for
+     *            global search.
+     * @param languages
+     *            comma separated list of language codes to search in, may be
+     *            null to search all languages. Language codes can be:
+     *            <ul>
+     *            <li><code>default</code> for content having no specific
+     *            language information</li>
+     *            <li>lower case 2-letter language codes like <code>en</code>,
+     *            <code>de</code> as used by xwiki</li>
+     *            </ul>
+     * @return a {@link SearchResults}instance containing the results.
+     */
+    public SearchResults getSearchResults (String query, String virtualWikiNames, String languages,
+                                           com.xpn.xwiki.api.XWiki wiki)
+    {
+        try
+        {
+            SearchResults retval = getPlugin ().getSearchResults (query, virtualWikiNames, languages, wiki);
+            if (LOG.isDebugEnabled ()) LOG.debug ("returning " + retval.getHitcount () + " results");
+            return retval;
+        } catch (Exception e)
+        {
+            e.printStackTrace ();
+        }
+        return null;
+    }
+
+    /*
+    @return the number of documents in the queue
+     */
+    public long getQueueSize() {
+        return plugin.getQueueSize();
+    }
+
+
+    /**
+     * @param plugin
+     *            plugin instance we are the facade for.
+     */
+    public void setPlugin (LucenePlugin plugin)
+    {
+        this.plugin = plugin;
+    }
+
+    /**
+     * @return the plugin instance we are the facade for.
+     */
+    public LucenePlugin getPlugin ()
+    {
+        return this.plugin;
+    }
+
+}
\ No newline at end of file


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/LucenePluginApi.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/ObjectData.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/ObjectData.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/ObjectData.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,209 @@
+/*
+ *
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Kr�mer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 25.01.2005
+ *
+ * @author Lokesh (N.Lokeswara Reddy) Congruent Solutions.Pvt.Ltd.
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.document.Field;
+import com.xpn.xwiki.doc.XWikiDocument;
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.objects.BaseProperty;
+import com.xpn.xwiki.objects.BaseObject;
+import com.xpn.xwiki.objects.PropertyInterface;
+import com.xpn.xwiki.objects.classes.BaseClass;
+import com.xpn.xwiki.objects.classes.StaticListClass;
+import com.xpn.xwiki.objects.classes.ListClass;
+import com.xpn.xwiki.objects.classes.ListItem;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Iterator;
+
+/**
+ * Hold the property values of the XWiki.ArticleClass Objects.
+ */
+public class ObjectData extends IndexData {
+
+    private static final Logger LOG = Logger.getLogger(ObjectData.class);
+
+    public ObjectData(final XWikiDocument doc, final XWikiContext context) {
+        super(doc, context);
+        setAuthor(doc.getAuthor());
+        setCreator(doc.getCreator());
+        setModificationDate(doc.getDate());
+        setCreationDate(doc.getCreationDate());
+    }
+
+
+    /**
+     * @see net.jkraemer.xwiki.plugins.lucene.IndexData#getType()
+     */
+    public String getType() {
+        return LucenePlugin.DOCTYPE_WIKIPAGE;
+    }
+
+    /**
+     * @return a string containing the result of
+     *         {@link IndexData#getFullText(XWikiDocument,XWikiContext)}plus
+     *         the full text content (values of title,category,content and extract ) XWiki.ArticleClass Object, as far as it could be
+     *         extracted.
+     */
+    public String getFullText(XWikiDocument doc, XWikiContext context) {
+        StringBuffer retval = new StringBuffer(super.getFullText(doc, context));
+        String contentText = getContentAsText(doc, context);
+        if (contentText != null) {
+            retval.append(" ").append(contentText).toString();
+        }
+        return retval.toString();
+    }
+
+    /**
+     * @param doc
+     * @param context
+     * @return string containing value of title,category,content and extract of XWiki.ArticleClass
+     */
+    private String getContentAsText(XWikiDocument doc, XWikiContext context) {
+        StringBuffer contentText = new StringBuffer();
+        try {
+            LOG.info(doc.getFullName());
+            Map objects = doc.getxWikiObjects();
+            Iterator itKey = objects.keySet().iterator();
+            while (itKey.hasNext()) {
+                String className = (String) itKey.next();
+                Iterator itObj = doc.getObjects(className).iterator();
+                while (itObj.hasNext())
+                    extractContent(contentText, (BaseObject) itObj.next(), context);
+            }
+
+        } catch (Exception e) {
+            LOG.error("error getting content from  XWiki Objects ", e);
+            e.printStackTrace();
+        }
+        return contentText.toString();
+    }
+
+    private void extractContent(StringBuffer contentText, BaseObject baseObject, XWikiContext context) {
+        try {
+            if (baseObject != null) {
+                Object[] propertyNames = baseObject.getPropertyNames();
+                for (int i = 0; i < propertyNames.length; i++) {
+                    BaseProperty baseProperty = (BaseProperty) baseObject.getField((String) propertyNames[i]);
+                    if ((baseProperty != null) && (baseProperty.getValue() != null)) {
+                        contentText.append(baseProperty.getValue().toString());
+                    }
+                    contentText.append(" ");
+                }
+            }
+        } catch (Exception e) {
+            LOG.error("error getting content from  XWiki Object ", e);
+            e.printStackTrace();
+        }
+    }
+
+    public void addDataToLuceneDocument(org.apache.lucene.document.Document luceneDoc, XWikiDocument doc,
+                                        XWikiContext context) {
+
+        super.addDataToLuceneDocument(luceneDoc, doc, context);
+        Map objects = doc.getxWikiObjects();
+        String className;
+        Iterator itObj;
+        BaseObject baseObject;
+        for (Iterator itr = objects.keySet().iterator(); itr.hasNext();) {
+            className = (String) itr.next();
+            itObj = doc.getObjects(className).iterator();
+
+            while (itObj.hasNext()) {
+                baseObject = (BaseObject) itObj.next();
+                Object[] propertyNames = baseObject.getPropertyNames();
+                for (int i = 0; i < propertyNames.length; i++) {
+                    try {
+                        indexProperty(luceneDoc, baseObject, (String) propertyNames[i], context);
+                    } catch (Exception e) {
+                        LOG.error("error extracting fulltext for document " + this, e);
+                    }
+                }
+            }
+        }
+    }
+
+    private void indexProperty(org.apache.lucene.document.Document luceneDoc, BaseObject baseObject, String propertyName, XWikiContext context) {
+        String fieldFullName = baseObject.getClassName() + "." + propertyName;
+        BaseClass bClass = baseObject.getxWikiClass(context);
+        PropertyInterface prop = bClass.getField(propertyName);
+
+        if (prop instanceof StaticListClass && ((StaticListClass)prop).isMultiSelect()) {
+            indexStaticList(luceneDoc, baseObject, (StaticListClass) prop, propertyName, context);
+        } else {
+            final String ft = getContentAsText(baseObject, propertyName);
+            if (ft != null) {
+                luceneDoc.add(new Field(fieldFullName, ft, Field.Store.YES, Field.Index.TOKENIZED));
+
+            }
+        }
+    }
+
+    private void indexStaticList(org.apache.lucene.document.Document luceneDoc, BaseObject baseObject, StaticListClass prop, String propertyName, XWikiContext context) {
+        Map possibleValues = prop.getMap(context);
+        List keys = baseObject.getListValue(propertyName);
+        String fieldFullName = baseObject.getClassName() + "." + propertyName;
+        Iterator it = keys.iterator();
+        while (it.hasNext()) {
+            String value = (String) it.next();
+            ListItem item = (ListItem) possibleValues.get(value);
+            if (item != null) {
+                // we index the key of the list
+                String fieldName = fieldFullName + ".key";
+                luceneDoc.add(new Field(fieldName, item.getId(), Field.Store.YES, Field.Index.TOKENIZED));
+                //we index the value
+                fieldName = fieldFullName + ".value";
+                luceneDoc.add(new Field(fieldName, item.getValue(), Field.Store.YES, Field.Index.TOKENIZED));
+                if (!item.getId().equals(item.getValue())) {
+                    luceneDoc.add(new Field(fieldFullName, item.getValue(), Field.Store.YES, Field.Index.TOKENIZED));
+                }
+            }
+            //we index both if value is not equal to the id(key)
+            luceneDoc.add(new Field(fieldFullName, value, Field.Store.YES, Field.Index.TOKENIZED));
+        }
+    }
+
+    public String getFullText(XWikiDocument doc, BaseObject baseObject, String property, XWikiContext context) {
+        return getContentAsText(baseObject, property);
+    }
+
+    private String getContentAsText(BaseObject baseObject, String property) {
+
+        StringBuffer contentText = new StringBuffer();
+        try {
+            BaseProperty baseProperty;
+            baseProperty = (BaseProperty) baseObject.getField(property);
+            if (baseProperty.getValue() != null) {
+                contentText.append(baseProperty.getValue().toString());
+            }
+        } catch (Exception e) {
+            LOG.error("error getting content from  XWiki Objects ", e);
+            e.printStackTrace();
+        }
+        return contentText.toString();
+    }
+
+
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/ObjectData.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResult.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResult.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResult.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,207 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 25.01.2005
+ *
+ */
+
+package com.xpn.xwiki.plugin.lucene;
+import java.util.Date;
+
+import org.apache.log4j.Logger;
+
+import com.xpn.xwiki.XWikiContext;
+import com.xpn.xwiki.XWikiException;
+import com.xpn.xwiki.api.Context;
+import com.xpn.xwiki.api.Document;
+import com.xpn.xwiki.web.XWikiURLFactory;
+
+/**
+ * Result of a search. The Plugin will return a collection of these for display
+ * on the search page.
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class SearchResult
+{
+    private float               score;
+    private String              name;
+    private String              wiki;
+    private String              web;
+    private String              url;
+    private String              filename;
+    private String              type;
+    private String              author;
+    private String              language;
+    private Date                date;
+    private Date                creationDate;
+    private String              creator;
+    private static final Logger LOG = Logger.getLogger (SearchResult.class);
+
+    /**
+     * @todo add fallback for unknown index field names (read values into a map
+     *       accessible from search results page) This would be useful for
+     *       integration of external indexes where the field names dont match
+     *       ours.
+     * @param doc
+     * @param score
+     * @todo TODO: to be more flexible make a factory to construct different
+     *       kinds of searchresults, esp. for external indexes and custom
+     *       implementations of searchresults
+     */
+    public SearchResult (org.apache.lucene.document.Document doc, float score, com.xpn.xwiki.api.XWiki xwiki)
+    {
+        this.score = score;
+        name = doc.get (IndexFields.DOCUMENT_NAME);
+        web = doc.get (IndexFields.DOCUMENT_WEB);
+        wiki = doc.get (IndexFields.DOCUMENT_WIKI);
+        type = doc.get (IndexFields.DOCUMENT_TYPE);
+        author = doc.get (IndexFields.DOCUMENT_AUTHOR);
+        creator = doc.get (IndexFields.DOCUMENT_CREATOR);
+        language = doc.get (IndexFields.DOCUMENT_LANGUAGE);
+        date = IndexFields.stringToDate (doc.get (IndexFields.DOCUMENT_DATE));
+        creationDate = IndexFields.stringToDate (doc.get (IndexFields.DOCUMENT_CREATIONDATE));
+        if (LucenePlugin.DOCTYPE_ATTACHMENT.equals (type))
+        {
+            filename = doc.get (IndexFields.FILENAME);
+            Document document;
+            final String fullDocName = new StringBuffer (wiki).append (":").append (web).append (".")
+                    .append (name).toString ();
+            try
+            {
+                document = xwiki.getDocument (fullDocName);
+                url = document.getAttachmentURL (filename, "download");
+            } catch (XWikiException e)
+            {
+                LOG.error ("error retrieving url for attachment " + filename + " of document " + fullDocName);
+                e.printStackTrace ();
+            }
+        }
+    }
+
+    /**
+     * @return Returns the name of the user who last modified the document.
+     */
+    public String getAuthor ()
+    {
+        return author;
+    }
+
+    /**
+     * @return Returns the date of last modification.
+     */
+    public Date getDate ()
+    {
+        return date;
+    }
+
+    /**
+     * @return Returns the filename, only used for Attachments (see
+     *         {@link #getType()})
+     */
+    public String getFilename ()
+    {
+        return filename;
+    }
+
+    /**
+     * @return Returns the name of the document.
+     */
+    public String getName ()
+    {
+        return name;
+    }
+
+    /**
+     * @return Returns the score of this search result as computed by lucene. Is
+     *         a float between zero and 1.
+     */
+    public float getScore ()
+    {
+        return score;
+    }
+
+    /**
+     * @return Returns the type of the document, atm this can be either
+     *         <code>wikipage</code> or <code>attachment</code>.
+     */
+    public String getType ()
+    {
+        return type;
+    }
+
+    /**
+     * @return Returns the url to access the document.
+     */
+    public String getUrl ()
+    {
+        return url;
+    }
+
+    /**
+     * @return Returns the web the document belongs to.
+     */
+    public String getWeb ()
+    {
+        return web;
+    }
+
+    /**
+     * @return the language of the Document, i.e. <code>de</code> or
+     *         <code>en</code>,<code>default</code> if no language was set
+     *         at indexing time.
+     */
+    public String getLanguage ()
+    {
+        return language;
+    }
+
+    /**
+     * @return creationDate of this document
+     */
+    public Date getCreationDate ()
+    {
+        return creationDate;
+    }
+
+    /**
+     * @return Username of the creator of the document
+     */
+    public String getCreator ()
+    {
+        return creator;
+    }
+
+    public void setUrl (String url)
+    {
+        this.url = url;
+    }
+
+    public String getWiki ()
+    {
+        return wiki;
+    }
+
+    /**
+     * @return true when this result points to wiki content (attachment or a
+     *         wiki page)
+     */
+    public boolean isWikiContent ()
+    {
+        return (LucenePlugin.DOCTYPE_WIKIPAGE.equals (type) || LucenePlugin.DOCTYPE_ATTACHMENT.equals (type));
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResult.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResults.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResults.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResults.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,205 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 01.02.2005
+ *
+ */
+package com.xpn.xwiki.plugin.lucene;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.search.Hits;
+
+import com.xpn.xwiki.api.XWiki;
+
+/**
+ * Container for the results of a search.
+ * <p>
+ * This class handles paging through search results and enforces the xwiki
+ * rights management by only returning search results the user executing the
+ * search is allowed to view.
+ * </p>
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class SearchResults
+{
+    private final XWiki         xwiki;
+    private final Hits          hits;
+    private static final Logger LOG = Logger.getLogger (SearchResults.class);
+
+    private List                relevantResults;
+
+    /**
+     * @param hits
+     *            Lucene search results
+     * @param beginIndex
+     *            index of first result to show (zero-based)
+     * @param endIndex
+     *            index of last result to show
+     * @param xwiki
+     *            xwiki instance for access rights checking
+     */
+    public SearchResults (Hits hits, XWiki xwiki)
+    {
+        this.hits = hits;
+        this.xwiki = xwiki;
+    }
+
+    private List getRelevantResults ()
+    {
+        if (relevantResults == null)
+        {
+            relevantResults = new ArrayList ();
+            final int hitcount = hits.length ();
+            for (int i = 0; i < hitcount; i++)
+            {
+                SearchResult result = null;
+                try
+                {
+                    result = new SearchResult (hits.doc (i), hits.score (i), xwiki);
+                    String pageName = null;
+                    if (result.isWikiContent ())
+                        pageName = result.getWeb () + "." + result.getName ();
+                    if (result != null && result.isWikiContent() && xwiki.checkAccess (pageName, "view") && xwiki.exists(pageName)) {
+                        relevantResults.add (result);
+                    }
+                } catch (Exception e) {
+                    LOG.error ("error getting search result", e);
+                    e.printStackTrace ();
+                }
+            }
+        }
+        return relevantResults;
+    }
+
+    /**
+     * @param beginIndex
+     * @param items
+     * @return true when there are more results than currently displayed.
+     */
+    public boolean hasNext (String beginIndex, String items)
+    {
+        final int itemCount = Integer.parseInt (items);
+        final int begin = Integer.parseInt (beginIndex);
+        return begin + itemCount - 1 < getRelevantResults ().size ();
+    }
+
+    /**
+     * @param beginIndex
+     * @return true when there is a page before the one currently displayed,
+     *         that is, when <code>beginIndex > 1</code>
+     */
+    public boolean hasPrevious (String beginIndex)
+    {
+        return Integer.parseInt (beginIndex) > 1;
+    }
+
+    /**
+     * @param beginIndex
+     * @param items
+     * @return the value to be used for the firstIndex URL parameter to build a
+     *         link pointing to the next page of results
+     */
+    public int getNextIndex (String beginIndex, String items)
+    {
+        final int itemCount = Integer.parseInt (items);
+        final int resultcount = getRelevantResults ().size ();
+        int retval = Integer.parseInt (beginIndex) + itemCount;
+        return retval > resultcount ? (resultcount - itemCount + 1) : retval;
+    }
+
+    /**
+     * @param beginIndex
+     * @param items
+     * @return the value to be used for the firstIndex URL parameter to build a
+     *         link pointing to the previous page of results
+     */
+    public int getPreviousIndex (String beginIndex, String items)
+    {
+        int retval = Integer.parseInt (beginIndex) - Integer.parseInt (items);
+        return 0 < retval ? retval : 1;
+    }
+
+    /**
+     * @param beginIndex
+     * @param items
+     * @return the index of the last displayed search result
+     */
+    public int getEndIndex (String beginIndex, String items)
+    {
+        int retval = Integer.parseInt (beginIndex) + Integer.parseInt (items) - 1;
+        final int resultcount = getRelevantResults ().size ();
+        if (retval > resultcount)
+        {
+            return resultcount;
+        }
+        return retval;
+    }
+
+    /**
+     * Helper method for use in velocity templates, takes string values instead
+     * of ints. See {@link #getResults(int, int)}.
+     * @param beginIndex
+     * @param items
+     * @return
+     */
+    public List getResults (String beginIndex, String items)
+    {
+        return getResults (Integer.parseInt (beginIndex), Integer.parseInt (items));
+    }
+
+    /**
+     * Returns a list of search results. According to beginIndex and endIndex,
+     * only a subset of the results is returned. To get the first ten results,
+     * one would use beginIndex=1 and items=10.
+     * @param beginIndex
+     *            1-based index of first result to return.
+     * @param items
+     *            number of items to return
+     * @return List of SearchResult instances starting at
+     *         <code>beginIndex</code> and containing up to
+     *         <code>items</code> elements.
+     */
+    public List getResults (int beginIndex, int items)
+    {
+        final int listStartIndex = beginIndex - 1;
+        final int listEndIndex = listStartIndex + items;
+        final List results = getRelevantResults();
+        final int resultcount = results.size ();
+        return getRelevantResults ().subList (listStartIndex,
+                                              listEndIndex < resultcount ? listEndIndex : resultcount);
+    }
+
+    /**
+     * @return all search results in one list.
+     */
+    public List getResults ()
+    {
+        return getRelevantResults ();
+    }
+
+    /**
+     * @return total number of searchresults the user is allowed to view
+     */
+    public int getHitcount ()
+    {
+        return getRelevantResults ().size ();
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/SearchResults.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/TextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/TextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/TextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,81 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 25.01.2005
+ *
+ */
+
+package com.xpn.xwiki.plugin.lucene;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+import com.xpn.xwiki.plugin.lucene.textextraction.*;
+
+/**
+ * Extraction of text from various binary formats. Extraction itself is done by
+ * the textExtractor classes in Packages below <code>org.outerj.daisy</code>
+ * taken from the <a href="http://new.cocoondev.org/daisy">Daisy project </a>.
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class TextExtractor
+{
+    private static final Logger LOG            = Logger.getLogger (TextExtractor.class);
+
+    static final Map            textExtractors = new HashMap ();
+    static
+    {
+        // TODO: make text extractors more pluggable by moving this into a config file.
+        final XmlTextExtractor xmlTextExtractor = new XmlTextExtractor ();
+        textExtractors.put ("application/xhtml+xml", xmlTextExtractor);
+        textExtractors.put ("text/xml", xmlTextExtractor);
+        textExtractors.put ("text/plain", new PlainTextExtractor());
+        textExtractors.put ("application/pdf", new PDFTextExtractor());
+//        textExtractors.put ("application/vnd.sun.xml.writer", new OpenOfficeTextExtractor ());
+        textExtractors.put ("application/msword", new MSWordTextExtractor ());
+        textExtractors.put ("application/ms-powerpoint", new MSPowerPointTextExtractor());
+        textExtractors.put ("application/ms-excel", new MSExcelTextExtractor());
+    }
+
+    /**
+     * @param content
+     * @param mimetype
+     * @return
+     */
+    public static String getText (byte[] content, String mimetype)
+    {
+        final MimetypeTextExtractor extractor = (MimetypeTextExtractor) textExtractors.get (mimetype);
+        if (extractor != null)
+        {
+            try
+            {
+                return extractor.getText (content);
+            } catch (Exception e)
+            {
+                LOG.error ("error getting text for mimetype " + mimetype, e);
+                e.printStackTrace ();
+            }
+        } else
+        {
+            LOG.info ("no text extractor for mimetype " + mimetype);
+        }
+        return null;
+    }
+
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/TextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/XWikiDocumentQueue.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/XWikiDocumentQueue.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/XWikiDocumentQueue.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,68 @@
+/*
+ * 
+ * ===================================================================
+ *
+ * Copyright (c) 2005 Jens Krämer, All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details, published at
+ * http://www.gnu.org/copyleft/gpl.html or in gpl.txt in the
+ * root folder of this distribution.
+ *
+ * Created on 24.01.2005
+ *
+ */
+
+package com.xpn.xwiki.plugin.lucene;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.collections.Buffer;
+import org.apache.commons.collections.buffer.UnboundedFifoBuffer;
+
+import com.xpn.xwiki.doc.XWikiDocument;
+
+/**
+ * @author <a href="mailto:jk at jkraemer.net">Jens Krämer </a>
+ */
+public class XWikiDocumentQueue
+{
+    /** maps names of documents to the document instances itself */
+    private Map    documentsByName = new HashMap ();
+    /** maintains fifo order */
+    private Buffer namesQueue      = new UnboundedFifoBuffer ();
+
+    public synchronized IndexData remove ()
+    {
+        return (IndexData) documentsByName.remove (namesQueue.remove ());
+    }
+
+    public synchronized void add (IndexData data)
+    {
+        final String key = data.toString ();
+        if (!documentsByName.containsKey (key))
+        {
+            // document with this name not yet in Queue, so add it
+            namesQueue.add (key);
+        }
+        // in any case put new version of this document in the map, overwriting
+        // possibly existing older version
+        documentsByName.put (key, data);
+    }
+
+    public synchronized boolean isEmpty ()
+    {
+        return namesQueue.isEmpty ();
+    }
+
+    public long getSize() {
+        return namesQueue.size();
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/XWikiDocumentQueue.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSExcelTextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSExcelTextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSExcelTextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,213 @@
+package com.xpn.xwiki.plugin.lucene.textextraction;
+import org.apache.poi.hssf.usermodel.*;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.log4j.Logger;
+
+import java.io.ByteArrayInputStream;
+import java.text.SimpleDateFormat;
+import java.text.DecimalFormat;
+import java.util.Date;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: lokesh
+ * Date: Oct 17, 2006
+ * Time: 4:49:10 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class MSExcelTextExtractor implements MimetypeTextExtractor {
+
+    private static final Logger LOG = Logger.getLogger(MSExcelTextExtractor.class);
+    /**
+     * The currently preparing Excel workbook.
+     */
+    private HSSFWorkbook mWorkbook;
+
+    /**
+     * Contains all data formats used in the currently preparing Excel workbook.
+     */
+    private HSSFDataFormat mDataFormat;
+
+    public static final int DEFAULT_BUFFER_SIZE = 16384; // 16 k
+
+    /**
+     *   Extracts all text from an Excel by parsing all the sheets in that excel document.
+     * @param data
+     * @return String
+     * @throws Exception
+     */
+    public String getText(byte[] data) throws Exception {
+
+        POIFSFileSystem poiFs = new POIFSFileSystem(new ByteArrayInputStream(data));
+        mWorkbook = new HSSFWorkbook(poiFs);
+        mDataFormat = mWorkbook.createDataFormat();
+
+        StringBuffer cleanBuffer = new StringBuffer(DEFAULT_BUFFER_SIZE);
+        for (int sheetIdx = 0; sheetIdx < mWorkbook.getNumberOfSheets(); sheetIdx++) {
+            HSSFSheet sheet = mWorkbook.getSheetAt(sheetIdx);
+
+            if (sheet != null) {
+                parseSheet(sheet, cleanBuffer);
+            }
+        }
+        return cleanBuffer.toString();
+    }
+
+    /**
+     *  It will parse the sheet with row wise and get the text from the sheet.
+     * @param sheet
+     * @param cleanBuffer
+     */
+
+    private void parseSheet(HSSFSheet sheet, StringBuffer cleanBuffer) {
+        int firstRow = sheet.getFirstRowNum();
+        int lastRow = sheet.getLastRowNum();
+        for (int rowIdx = firstRow; rowIdx <= lastRow; rowIdx++) {
+            HSSFRow row = sheet.getRow(rowIdx);
+
+            if (row != null) {
+                parseRow(row, cleanBuffer);
+            }
+        }
+    }
+
+    /**
+     *  It will parse row and return the text
+     * @param row
+     * @param cleanBuffer
+     */
+    private void parseRow(HSSFRow row, StringBuffer cleanBuffer) {
+        short firstCell = row.getFirstCellNum();
+        short lastCell = row.getLastCellNum();
+        for (short cellIdx = firstCell; cellIdx <= lastCell; cellIdx++) {
+            HSSFCell cell = row.getCell(cellIdx);
+
+            if (cell != null) {
+                parseCell(cell, cleanBuffer);
+            }
+        }
+    }
+
+    /**
+     *  Extracts all text from each cell of the sheet
+     * @param cell
+     * @param cleanBuffer
+     */
+    private void parseCell(HSSFCell cell, StringBuffer cleanBuffer) {
+        String cellValue = null;
+
+        if (cell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
+            cellValue = cell.getStringCellValue();
+        } else if (cell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
+            HSSFCellStyle style = cell.getCellStyle();
+            short formatId = style.getDataFormat();
+            String formatPattern = mDataFormat.getFormat(formatId);
+            formatPattern = replace(formatPattern, "\\ ", " ");
+
+            if (isCellDateFormatted(cell)) {
+                // This is a date
+                formatPattern = replace(formatPattern, "mmmm", "MMMM");
+                formatPattern = replace(formatPattern, "/", ".");
+                SimpleDateFormat format;
+                try {
+                    format = new SimpleDateFormat(formatPattern);
+                }
+                catch (Throwable thr) {
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("Creating date format failed: '" + formatPattern + "'", thr);
+                    }
+                    format = new SimpleDateFormat();
+                }
+
+                double numberValue = cell.getNumericCellValue();
+                Date date = HSSFDateUtil.getJavaDate(numberValue);
+                cellValue = format.format(date);
+            } else {
+                // This is a Number
+                DecimalFormat format;
+                try {
+                    format = new DecimalFormat(formatPattern);
+                }
+                catch (Throwable thr) {
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("Creating number format failed: '" + formatPattern + "'", thr);
+                    }
+                    format = new DecimalFormat();
+                }
+
+                double numberValue = cell.getNumericCellValue();
+                cellValue = format.format(numberValue);
+            }
+        }
+
+        if (cellValue != null) {
+            cellValue = cellValue.trim();
+            if (cellValue.length() != 0) {
+                cleanBuffer.append(cellValue);
+                cleanBuffer.append(" ");
+            }
+        }
+    }
+
+    /**
+     * Checks cell is date formatted or not.
+     * @param cell
+     * @return  boolean
+     */
+    private boolean isCellDateFormatted(HSSFCell cell) {
+        short format = cell.getCellStyle().getDataFormat();
+
+        if (HSSFDateUtil.isValidExcelDate(cell.getNumericCellValue())) {
+            if (HSSFDateUtil.isCellDateFormatted(cell)) {
+                return true;
+            } else {
+                String fmtText = mDataFormat.getFormat(format);
+
+                if (fmtText != null) {
+                    fmtText = fmtText.toLowerCase();
+
+                    if (fmtText.indexOf("d") >= 0
+                            || fmtText.indexOf("m") >= 0
+                            || fmtText.indexOf("y") >= 0
+                            || fmtText.indexOf("h") >= 0
+                            || fmtText.indexOf("s") >= 0) {
+                        return true;
+                    }
+                }
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * It will replace all occurances of pattern in the source with replacement value
+     * @param source
+     * @param pattern
+     * @param replacement
+     * @return    String
+     */
+    public static String replace(String source, String pattern, String replacement) {
+        // Check whether the pattern occurs in the source at all
+        int firstPatternPos = source.indexOf(pattern);
+        if (firstPatternPos == -1) {
+            // The pattern does not occur in the source -> return the source
+            return source;
+        }
+
+        // Build a new String where pattern is replaced by the replacement
+        StringBuffer target = new StringBuffer(source.length());
+        int start = 0;             // The start of a part without the pattern
+        int end = firstPatternPos; // The end of a part without the pattern
+        do {
+            target.append(source.substring(start, end));
+            target.append(replacement);
+            start = end + pattern.length();
+        } while ((end = source.indexOf(pattern, start)) != -1);
+        target.append(source.substring(start, source.length()));
+
+        // return the String
+        return target.toString();
+    }
+
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSExcelTextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSPowerPointTextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSPowerPointTextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSPowerPointTextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,23 @@
+/**
+ * Created by IntelliJ IDEA.
+ * User: lokesh
+ * Date: Oct 17, 2006
+ * Time: 3:31:38 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+package com.xpn.xwiki.plugin.lucene.textextraction;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+
+import java.io.ByteArrayInputStream;
+
+/**
+ * Text extractor for Microsoft Power Point files.
+ */
+public class MSPowerPointTextExtractor implements MimetypeTextExtractor {
+
+    public String getText(byte[] data) throws Exception {
+        PowerPointExtractor ppe = new PowerPointExtractor(new ByteArrayInputStream(data));
+        return ppe.getText(true, true);
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSPowerPointTextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSWordTextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSWordTextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSWordTextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2004 Outerthought bvba and Schaubroeck nv
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.xpn.xwiki.plugin.lucene.textextraction;
+import java.io.ByteArrayInputStream;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.usermodel.Range;
+
+/**
+ * Text extractor for Microsoft Word files.
+ */
+public class MSWordTextExtractor implements MimetypeTextExtractor {
+    public String getText(byte[] data) throws Exception {
+        HWPFDocument wordDoc = new HWPFDocument(new ByteArrayInputStream(data));
+        Range range = wordDoc.getRange();
+        return range.text();
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MSWordTextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MimetypeTextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MimetypeTextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MimetypeTextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2004 Outerthought bvba and Schaubroeck nv
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ * Changelog:
+ * jk at jkraemer.net: changed visibility of getText from package to public 
+ */
+package com.xpn.xwiki.plugin.lucene.textextraction;
+
+/**
+ * A text extractor for a specific mime type.
+ */
+public interface MimetypeTextExtractor {
+    
+    public String getText(byte[] data) throws Exception;
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/MimetypeTextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/OpenOfficeTextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/OpenOfficeTextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/OpenOfficeTextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2004 Outerthought bvba and Schaubroeck nv
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.xpn.xwiki.plugin.lucene.textextraction;
+import java.io.ByteArrayInputStream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.xmlpull.mxp1.MXParser;
+import org.xmlpull.v1.XmlPullParser;
+import com.xpn.xwiki.plugin.lucene.textextraction.xmlutil.XmlEncodingDetector;
+
+/**
+ * Extracts all text from an OpenOffice document.
+ */
+public class OpenOfficeTextExtractor implements MimetypeTextExtractor {
+    private static final String TEXTNAMESPACE="http://openoffice.org/2000/text";
+    
+    public String getText(byte[] data) throws Exception {
+        /*
+         * the byte array we receive here is in fact a ZIP containing the
+         * content.xml, styles.xml,meta.xml and META-INF/manifest.xml files. We
+         * are only interested in the content.xml because that's the file
+         * containing the actual content (duh)
+         */
+
+        ByteArrayInputStream bis = new ByteArrayInputStream(data);
+        ZipInputStream zis = new ZipInputStream(bis);
+
+        ZipEntry ze = null;
+        String zipEntryName = null;
+        StringBuffer text = new StringBuffer();
+
+        while ((ze = zis.getNextEntry()) != null
+            && !(zipEntryName = ze.getName()).equals("content.xml")) {
+        }
+
+        if (zipEntryName != null && zipEntryName.equals("content.xml")) {
+            /*
+             * we found the correct zip entry. This means the "read pointer" of
+             * the zipinputstream points correctly to the beginning of this zip
+             * entry and we can pass it to the xml parser like this (will
+             * return -1 as soon as the end of the zip entry is reached)
+             */            
+            
+            /* We are using this XmlPullParser because it was impossible to work
+             * with a sax parser. The sax parser always wanted to have access to the
+             * openoffice dtd. Even tried to write our own entityresolver to work
+             * around this problem but didnt work out. In order not to pin ourselves
+             * down to a specific sax implementor (where we eg. would be able to 
+             * specify that we explicitly don't want any check at all against a dtd)
+             * we choose not to use sax at all and use a very lightweight type of 
+             * parsing for this specific goal. 
+             */
+            
+            XmlPullParser parser = new MXParser();
+            parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
+            parser.setInput(zis, XmlEncodingDetector.detectEncoding(data));
+            boolean inText = false;
+
+            int eventType = parser.getEventType();
+            while (eventType != XmlPullParser.END_DOCUMENT)
+            {
+                eventType = parser.next();
+                if (eventType == XmlPullParser.START_TAG)
+                {
+                    if (parser.getName().equals("p") &&
+                            parser.getNamespace().equals(TEXTNAMESPACE)) {
+                        text.append(' ');
+                        inText = true;
+                    }
+                } else if (eventType == XmlPullParser.END_TAG) {
+                    if (parser.getName().equals("p") &&
+                            parser.getNamespace().equals(TEXTNAMESPACE)) {
+                        inText = false;
+                    }
+                } else if (eventType == XmlPullParser.TEXT) {
+                    if (inText) {
+                        String gotText = parser.getText();
+                        text.append(gotText);
+                    }
+                }
+            }
+            
+        } else {
+            throw new Exception("Invalid OpenOffice document format (content.xml not found)");
+        }
+
+        return text.toString();
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/OpenOfficeTextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + native

Added: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/PDFTextExtractor.java
===================================================================
--- xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/PDFTextExtractor.java	2006-12-06 21:50:31 UTC (rev 1689)
+++ xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/PDFTextExtractor.java	2006-12-06 22:29:02 UTC (rev 1690)
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2004 Outerthought bvba and Schaubroeck nv
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.xpn.xwiki.plugin.lucene.textextraction;
+import org.pdfbox.pdmodel.PDDocument;
+import org.pdfbox.pdfparser.PDFParser;
+import org.pdfbox.util.PDFTextStripper;
+
+import java.io.ByteArrayInputStream;
+import java.io.CharArrayWriter;
+
+public class PDFTextExtractor implements MimetypeTextExtractor {
+    public String getText(byte[] data) throws Exception {
+        PDDocument pdfDocument = null;
+        try {
+            PDFParser parser = new PDFParser(new ByteArrayInputStream(data));
+            parser.parse();
+
+            pdfDocument = parser.getPDDocument();
+
+            CharArrayWriter writer = new CharArrayWriter();
+            PDFTextStripper stripper = new PDFTextStripper();
+            stripper.writeText(pdfDocument, writer);
+
+            return writer.toString();
+        } finally {
+            if( pdfDocument != null )
+                pdfDocument.close();
+        }
+    }
+}


Property changes on: xwiki/trunk/core/src/main/java/com/xpn/xwiki/plugin/lucene/textextraction/PDFTextExtractor.java
___________________________________________________________________
Name: svn:eol-style
   + nativ