* You can also use \s in Regex
It really depends what chars you want removed but your implementation
isn't correct I'm sure.
Thanks
-Vincent
On Nov 10, 2008, at 1:09 PM, asiri (SVN) wrote:
Author: asiri
Date: 2008-11-10 13:09:35 +0100 (Mon, 10 Nov 2008)
New Revision: 14068
Added:
sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/filter/HtmlTableFilter.java
sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/utils/ImporterUtils.java
Modified:
sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/filter/HtmlListFilter.java
sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java
sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java
Log:
XAOFFICE-1 : Develop the initial feature set for office-importer
plugin.
* Completing list support and table support, need to test and make
few more adjustments.
Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
xwiki/plugin/officeimporter/filter/HtmlListFilter.java
===================================================================
--- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/filter/HtmlListFilter.java 2008-11-10 11:32:32
UTC (rev 14067)
+++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/filter/HtmlListFilter.java 2008-11-10 12:09:35
UTC (rev 14068)
@@ -5,11 +5,15 @@
import org.w3c.dom.NodeList;
import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
+import com.xpn.xwiki.plugin.officeimporter.utils.ImporterUtils;
/**
- * This is a temporary filter until we have a clear solution to the
issue at
- *
http://jira.xwiki.org/jira/browse/XWIKI-2812. The problem occurs
when the first child of a
- * {@code <li>} tag is a {@code <p>} tag.
+ * Presently xwiki rendering module doesn't support complex list
items. Because of this reason this
+ * temporary filter is used to rip off any complex formatting
elements present in html lists. The
+ * JIRA issue is located at
http://jira.xwiki.org/jira/browse/XWIKI-2812
.
+ *
+ * @version $Id$
+ * @since 1.7M1
*/
public class HtmlListFilter implements HtmlFilter
{
@@ -21,13 +25,23 @@
NodeList listItems = document.getElementsByTagName("li");
for (int i = 0; i < listItems.getLength(); i++) {
Node listItem = listItems.item(i);
- Node firstChild = listItem.getFirstChild();
- if (firstChild != null &&
firstChild.getNodeName().equals("p")) {
- NodeList grandChildren = firstChild.getChildNodes();
- while (grandChildren.getLength() > 0) {
- listItem.insertBefore(grandChildren.item(0),
firstChild);
+ Node counter = listItem.getFirstChild();
+ while (counter != null) {
+ if (counter.getNodeType() == Node.TEXT_NODE) {
+ String trimmed =
ImporterUtils.leftTrim(counter.getTextContent());
+ counter.setTextContent(trimmed);
+ if (trimmed.equals("")) {
+ counter = counter.getNextSibling();
+ continue;
+ }
+ } else if (counter.getNodeName().equals("p")) {
+ NodeList children = counter.getChildNodes();
+ while (children.getLength() > 0) {
+ listItem.insertBefore(children.item(0),
counter);
+ }
+ listItem.removeChild(counter);
}
- listItem.removeChild(firstChild);
+ break;
}
}
}
Added: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
xwiki/plugin/officeimporter/filter/HtmlTableFilter.java
===================================================================
--- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/filter/
HtmlTableFilter.java (rev 0)
+++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/filter/HtmlTableFilter.java 2008-11-10
12:09:35 UTC (rev 14068)
@@ -0,0 +1,65 @@
+package com.xpn.xwiki.plugin.officeimporter.filter;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
+import com.xpn.xwiki.plugin.officeimporter.utils.ImporterUtils;
+
+/**
+ * Presently xwiki rendering module doesn't support complex table
cell items. This filter is used to
+ * rip-off or modify html tables so that they can be rendered
properly. The corresponding JIRA issue
+ * is located at
http://jira.xwiki.org/jira/browse/XWIKI-2804.
+ *
+ * @version $Id$
+ * @since 1.7M1
+ */
+public class HtmlTableFilter implements HtmlFilter
+{
+ /**
+ * {@inheritDoc}
+ */
+ public void filter(Document document, ImporterContext context)
+ {
+ NodeList cellItems = document.getElementsByTagName("td");
+ for (int i = 0; i < cellItems.getLength(); i++) {
+ Node cellItem = cellItems.item(i);
+ Node counter = cellItem.getFirstChild();
+ while (counter != null) {
+ if (counter.getNodeType() == Node.TEXT_NODE) {
+ String trimmed =
ImporterUtils.leftTrim(counter.getTextContent());
+ counter.setTextContent(trimmed);
+ if (trimmed.equals("")) {
+ counter = counter.getNextSibling();
+ continue;
+ }
+ } else if (counter.getNodeName().equals("p")) {
+ NodeList children = counter.getChildNodes();
+ while (children.getLength() > 0) {
+ cellItem.insertBefore(children.item(0),
counter);
+ }
+ cellItem.removeChild(counter);
+ }
+ break;
+ }
+ counter = cellItem.getLastChild();
+ while (counter != null) {
+ if (counter.getNodeType() == Node.TEXT_NODE) {
+ String trimmed =
ImporterUtils.rightTrim(counter.getTextContent());
+ counter.setTextContent(trimmed);
+ if (trimmed.equals("")) {
+ counter = counter.getPreviousSibling();
+ continue;
+ }
+ }
+ break;
+ }
+ // Fill all empty cells with an empty character
(space / tab).
+ // This is because the current xwiki 2.0 syntax doesn't
handle empty cells correctly.
+ if (cellItem.getTextContent().equals("")) {
+ cellItem.setTextContent("-");
+ }
+ }
+ }
+}
Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
xwiki/plugin/officeimporter/transformer/
HtmlToXWikiTwoZeroTransformer.java
===================================================================
--- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java
2008-11-10 11:32:32 UTC (rev 14067)
+++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java
2008-11-10 12:09:35 UTC (rev 14068)
@@ -23,6 +23,7 @@
import com.xpn.xwiki.plugin.officeimporter.filter.HtmlLinkFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.HtmlListFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.HtmlStyleFilter;
+import com.xpn.xwiki.plugin.officeimporter.filter.HtmlTableFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
@@ -50,6 +51,7 @@
filterList.add(new ImageTagFilter());
filterList.add(new HtmlLinkFilter());
filterList.add(new HtmlListFilter());
+ filterList.add(new HtmlTableFilter());
}
/**
Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
xwiki/plugin/officeimporter/transformer/
HtmlToXWikiXhtmlTransformer.java
===================================================================
--- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java
2008-11-10 11:32:32 UTC (rev 14067)
+++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java
2008-11-10 12:09:35 UTC (rev 14068)
@@ -13,6 +13,7 @@
import com.xpn.xwiki.plugin.officeimporter.filter.HtmlLinkFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.HtmlListFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.HtmlStyleFilter;
+import com.xpn.xwiki.plugin.officeimporter.filter.HtmlTableFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
@@ -41,6 +42,7 @@
filterList.add(new ImageTagFilter());
filterList.add(new HtmlLinkFilter());
filterList.add(new HtmlListFilter());
+ filterList.add(new HtmlTableFilter());
}
/**
Added: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
xwiki/plugin/officeimporter/utils/ImporterUtils.java
===================================================================
--- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/utils/
ImporterUtils.java (rev 0)
+++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
plugin/officeimporter/utils/ImporterUtils.java 2008-11-10 12:09:35
UTC (rev 14068)
@@ -0,0 +1,36 @@
+package com.xpn.xwiki.plugin.officeimporter.utils;
+
+/**
+ * Utility methods common for office importer.
+ *
+ * @version $Id$
+ * @since 1.7M1
+ */
+public abstract class ImporterUtils
+{
+ public static String leftTrim(String s)
+ {
+ String content = s.trim();
+ if (content.equals("")) {
+ return "";
+ } else {
+ int index = s.indexOf(content);
+ if(index == 0) {
+ return s;
+ } else {
+ return s.substring(index);
+ }
+ }
+ }
+
+ public static String rightTrim(String s)
+ {
+ String content = s.trim();
+ if (content.equals("")) {
+ return "";
+ } else {
+ int index = s.indexOf(content);
+ return s.substring(0, index + content.length());
+ }
+ }
+}