Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Apache Jackrabbit JCA 2.7.5 .docx and .xlsx indexing

Tags:

I'm ussing the Appache Jackrabbit JCA 2.7.5, the problem is that files .docx and .xlsx is not indexed.

My steps :

  • Deploy the Jackrabbit JCA as resource adapter on glassfish
  • create a Connector Connection Pool for the resource adapter indicating the ConfigFile=path/to/the/repository.xml and HomeDir=path/to/the //miss the repository.xml
  • create a Connector Resources for the connector pool (the jndi)
  • create web application
  • create class to get session from the connector ressources (code below)

    import java.io.Serializable;
    import java.net.MalformedURLException;
    import javax.annotation.Resource;
    import javax.ejb.Stateless;
    import javax.jcr.LoginException;
    import javax.jcr.Repository;
    import javax.jcr.RepositoryException;
    import javax.jcr.Session;
    import javax.jcr.SimpleCredentials;
    import javax.naming.InitialContext;
    import javax.naming.NamingException;
    @Stateless
    public class OcmRepository implements Serializable {
    
        public Repository repository;
        public Session session;
    
        public OcmRepository() {
        }
    
        public Session getSession(String log, String mdp) throws LoginException, RepositoryException, NamingException, MalformedURLException {
            InitialContext initalContext = new InitialContext();
            repository = (Repository) initalContext.lookup("jndi/jca");
            session = repository.login(new SimpleCredentials(log, mdp.toCharArray()), null);
            return session;
        }
    }
    
  • Create custom filetype

    import javax.jcr.PropertyType;
    import javax.jcr.Session;
    import javax.jcr.nodetype.NodeType;
    import javax.jcr.nodetype.NodeTypeManager;
    import javax.jcr.nodetype.NodeTypeTemplate;
    import javax.jcr.nodetype.PropertyDefinitionTemplate;
    
    /**
     *
     * @author nathan
     */
    public class FileType {
        public static void RegisterFileType(Session session) throws Exception {        
            NodeTypeManager nodeTypeManager = session.getWorkspace().getNodeTypeManager();
    
            NodeTypeTemplate nodeType = nodeTypeManager.createNodeTypeTemplate();
            nodeType.setName("FileType");
            String[] str = {"nt:resource"};        
            nodeType.setDeclaredSuperTypeNames(str);
            nodeType.setMixin(false);
            nodeType.setQueryable(true);
    
    
            PropertyDefinitionTemplate path = nodeTypeManager.createPropertyDefinitionTemplate();
            path.setName("jcr:path");
            path.setRequiredType(PropertyType.PATH);
            path.setQueryOrderable(false);
            path.setFullTextSearchable(false);
            nodeType.getPropertyDefinitionTemplates().add(path);
    
            PropertyDefinitionTemplate nom = nodeTypeManager.createPropertyDefinitionTemplate();
            nom.setName("jcr:nom");
            nom.setRequiredType(PropertyType.STRING);
            nom.setQueryOrderable(true);
            nom.setFullTextSearchable(true);
            nodeType.getPropertyDefinitionTemplates().add(nom);
    
            PropertyDefinitionTemplate description = nodeTypeManager.createPropertyDefinitionTemplate();
            description.setName("jcr:description");
            description.setRequiredType(PropertyType.STRING);
            description.setQueryOrderable(true);
            description.setFullTextSearchable(true);
            nodeType.getPropertyDefinitionTemplates().add(description);
    
            PropertyDefinitionTemplate motsCles = nodeTypeManager.createPropertyDefinitionTemplate();
            motsCles.setName("jcr:motsCles");
            motsCles.setRequiredType(PropertyType.STRING);
            motsCles.setQueryOrderable(true);
            motsCles.setFullTextSearchable(true);
            nodeType.getPropertyDefinitionTemplates().add(motsCles);
    
            PropertyDefinitionTemplate size = nodeTypeManager.createPropertyDefinitionTemplate();
            size.setName("jcr:size");
            size.setRequiredType(PropertyType.STRING);
            size.setQueryOrderable(true);
            size.setFullTextSearchable(false);
            nodeType.getPropertyDefinitionTemplates().add(size);
    
            PropertyDefinitionTemplate users = nodeTypeManager.createPropertyDefinitionTemplate();
            users.setName("jcr:users");
            users.setRequiredType(PropertyType.STRING);
            users.setQueryOrderable(true);
            users.setFullTextSearchable(false);
            nodeType.getPropertyDefinitionTemplates().add(users);
    
            PropertyDefinitionTemplate groupe = nodeTypeManager.createPropertyDefinitionTemplate();
            groupe.setName("jcr:groupe");
            groupe.setRequiredType(PropertyType.STRING);
            groupe.setQueryOrderable(true);
            groupe.setFullTextSearchable(false);
            nodeType.getPropertyDefinitionTemplates().add(groupe);
    
            NodeType newnodetype = nodeTypeManager.registerNodeType(nodeType, true);             
            session.save();        
        }
    
    }
    
  • Create the abstract class for persistence

    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    
    import javax.jcr.Session;
    
    import org.apache.jackrabbit.ocm.query.Filter;
    import org.apache.jackrabbit.ocm.query.impl.FilterImpl;
    import org.apache.jackrabbit.ocm.query.impl.QueryImpl;
    import org.apache.jackrabbit.ocm.query.Query;
    import org.apache.jackrabbit.ocm.query.QueryManager;
    
    import org.apache.jackrabbit.ocm.manager.ObjectContentManager;
    import org.apache.jackrabbit.ocm.manager.impl.ObjectContentManagerImpl;
    
    import org.apache.jackrabbit.ocm.mapper.Mapper;
    import org.apache.jackrabbit.ocm.mapper.impl.annotation.AnnotationMapperImpl;
    
    import org.apache.jackrabbit.ocm.reflection.ReflectionUtils;
    
    
    /**
     *
     * @author nathan
     */
    public abstract class AbstractBean<T> {
    
        private Class<T> entityClass;
        private ObjectContentManager ocm;
        private Mapper mapper;
    
        public AbstractBean(Class<T> entityClass){
            this.entityClass = entityClass;
        }
    
        /**
         * Construct the Bean according to the extended class
         * This will be also construct the ObjectContentManager nammed ocm with the default Mapper
         * @param session javax.jcr.Session attached to the Bean
         * @return The mapping class found for the desired java bean class
         */
        public AbstractBean(Class<T> entityClass,Session session){
            this.entityClass = entityClass;
            ocm = new ObjectContentManagerImpl(session, this.getDefaultMapper());
        }
    
        /**
         * @return ObjectContentManager of the Bean
         */
        public ObjectContentManager getOcm() throws Exception{
            return ocm;
        }
    
        /**
         * Construct the Bean according to the extended class
         * This will be also construct the ObjectContentManager nammed ocm with the param Mapper given
         * @param session from "javax.jcr.Session" attached to the Bean
         * @param map from "org.apache.jackrabbit.ocm.mapper.Mapper" which 
         * is the use to map entity between apllication and The repository
         * @return ObjectContentManager of the Bean
         */   
        public ObjectContentManager getOcm(Session session, Mapper map) throws Exception{
            return new ObjectContentManagerImpl(session, map);
        }
    
        public void setOcm(ObjectContentManager ocm) {
            this.ocm = ocm;
        }
    
        private Mapper getDefaultMapper(){
            ReflectionUtils.setClassLoader(com.ged.ocm.entity.Groupe.class.getClassLoader());
            List<Class> classes = new ArrayList<Class>();
            classes.add(com.ged.ocm.entity.Fichier.class);
            classes.add(com.ged.ocm.entity.Dossier.class);
            classes.add(com.ged.ocm.entity.Groupe.class);
            classes.add(com.ged.ocm.entity.SimpleNode.class);
            return new AnnotationMapperImpl(classes);
        }
    
        public Mapper getMapper() {
            return mapper;
        }
    
        public void setMapper(Mapper mapper) {
            this.mapper = mapper;
        }
    
        public void setLoader(Class classe){        
            ReflectionUtils.setClassLoader(classe.getClassLoader());
        }
    
        public void create(T entity) {
            ocm.insert(entity);
            ocm.save();
        }
    
        public void edit(T entity) {
            ocm.update(entity);
            ocm.save();
        }
    
        public void remove(T entity) {
            ocm.remove(entity);
            ocm.save();
        }
    
        public void refresh(){
            ocm.refresh(true);
            ocm.save();
        }
    
        public void copy(String orgPath, String destPath){
            ocm.copy(orgPath, destPath);
            ocm.save();
        }
    
        public void move(String orgPath, String destPath){
            ocm.move(orgPath, destPath);
            ocm.save();
        }
        public void removeByPath(String path) {
            ocm.remove(path);
            ocm.save();
        }
    
        public void removeAllByEqual(Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            ocm.remove(query);
            ocm.save();
        }
    
        public void removeAllByEqual(String nodePath,Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(nodePath);
            for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            ocm.remove(query);
            ocm.save();
        }
    
        public boolean isPathExist(String path){
            return ocm.objectExists(path);
        }
    
        public T findByPath(String path) {
            try {            
                return (T)ocm.getObject(path);
            } catch (Exception e) {
                return null;
            }
        }
    
        public T findOneByEqual(Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
    
            T result = null;
            try {            
                result = results.get(0);
            } catch (Exception e) {
            }
    
            return result;
        }
    
        public List<T> findAllByEqual(Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope("//");
            for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
        }
    
    
        public List<T> findAllByLike(Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope("//");
            for (String key : filters.keySet())filter.addLike(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
        }
    
        public List<T> findAllByLikeScoped(String scope,Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(scope);
            for (String key : filters.keySet())filter.addLike(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
        }
    
        public List<T> findAllByOrLike(String attr,String[] val){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope("//");
            filter.addOrFilter(attr, val);
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
        }
    
        public T findOneByEqual(String nodePath, Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(nodePath);
            for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            T result = results.get(0);
            return result;
        }
    
        public List<T> findAllByEqual(String nodePath, Map<String,String> filters){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(nodePath);
            for (String key : filters.keySet())filter.addEqualTo(key, filters.get(key));
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
        }
    
        public List<T> findAllByString(String query){        
            List<T> results = (List<T>) ocm.getObjects(query,javax.jcr.query.Query.JCR_SQL2);
            return results;
        } 
    
    
        public List<T> findAllByParentPath(String nodePath){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(nodePath);
    
            Query query = queryManager.createQuery(filter);
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
    
        }
        public List<T> findAllByParentPathOrder(String nodePath, String ordering){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(nodePath);
    
            Query query = queryManager.createQuery(filter);
    //        query.addOrderByDescending(ordering);
            query.addOrderByAscending(ordering);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results;
    
        }
    
        public int coutChild(String nodePath){
            QueryManager queryManager = ocm.getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(entityClass);
            filter.setScope(nodePath);
    
            Query query = queryManager.createQuery(filter);
    
            List<T> results = (List<T>) ocm.getObjects(query);
            return results.size();
        }
    
        public boolean ifExistByPath(String path){
            return ocm.objectExists(path);
        }
    
        public String getParentPath(String path){
            String parent="";
            String[] tmp=path.split("/");
            for (int i = 1; i < (tmp.length-1); i++) {
                parent+="/"+tmp[i];
            }
            return parent;                
        }
    }
    
  • Create the bean

    import javax.ejb.Stateless;
    import com.ged.ocm.entity.Fichier;
    import java.io.InputStream;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    import javax.jcr.Node;
    import javax.jcr.NodeIterator;
    import javax.jcr.Session;
    import javax.jcr.Workspace;
    import javax.jcr.query.QueryResult;
    import javax.jcr.query.qom.FullTextSearch;
    import javax.jcr.query.qom.StaticOperand;
    import org.apache.jackrabbit.ocm.query.Filter;
    import org.apache.jackrabbit.ocm.query.Query;
    import org.apache.jackrabbit.ocm.query.QueryManager;
    
    @Stateless
    public class FichierBean extends AbstractBean<Fichier>{    
        public FichierBean() {
            super(Fichier.class);
        }
        public FichierBean(Session session) {
            super(Fichier.class,session);
        }
    
        public List<Fichier> findAllByContains(String motCles) throws Exception {
            String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*')";
            List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2);
            return results;                
        }
        public List<Fichier> findAllByContains(String path,String motCles) throws Exception {
            String requette = "SELECT * FROM FileType AS Res WHERE CONTAINS (Res.*, '*"+motCles+"*') ORDER BY Res.nom";
            List<Fichier> tmp = (List<Fichier>) this.getOcm().getObjects(requette, javax.jcr.query.Query.JCR_SQL2);
    
            List<Fichier> results = new ArrayList<Fichier>();
            for (Fichier fichier : tmp) {
                if(fichier.getPath().startsWith(path))results.add(fichier);
            }
            return results;                
        }
    
    
        public List<Fichier> fulltextByOCM(String motCles) throws Exception {
            QueryManager queryManager = this.getOcm().getQueryManager();
    
            Filter filter;
            filter = queryManager.createFilter(com.ged.ocm.entity.Fichier.class);
            filter.addContains(".", "*"+motCles+"*");
    
            Query query = queryManager.createQuery(filter);
    
            List<Fichier> results = (List<Fichier>) this.getOcm().getObjects(query);
            return results;
        }
    
    }
    

My configuration files :

  • repository.xml

    <?xml version="1.0"?>
    <!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.6//EN"
                            "http://jackrabbit.apache.org/dtd/repository-1.6.dtd">
    <Repository>        
    <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
        <param name="path" value="${rep.home}/repository"/>
    </FileSystem>
    -->
    
    <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
        <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
        <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
        <param name="user" value="root" />
        <param name="password" value="root" />
        <param name="schema" value="mysql"/>
        <param name="schemaObjectPrefix" value="J_R_FS_"/>
    </FileSystem>
    
    <!--
        security configuration
    -->
    <Security appName="Jackrabbit">
        <AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager" />
        <LoginModule class="org.apache.jackrabbit.core.security.SimpleLoginModule">
            <param name="anonymousId" value="anonymous" />
        </LoginModule>
    </Security>
    
    <!--
        location of workspaces root directory and name of default workspace
    -->
    <Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/>
    <!--
        workspace configuration template:
        used to create the initial workspace if there's no workspace yet
    -->
    <Workspace name="${wsp.name}">
    
        <PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager">
            <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
            <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
            <param name="user" value="root" />
            <param name="password" value="root" />
            <param name="schema" value="mysql" />
            <param name="schemaObjectPrefix" value="J_PM_${wsp.name}_" />
            <param name="externalBLOBs" value="false" />
        </PersistenceManager>
        <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
            <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
            <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
            <param name="user" value="root" />
            <param name="password" value="root" />
            <param name="schema" value="mysql"/>
            <param name="schemaObjectPrefix" value="J_FS_${wsp.name}_"/>
        </FileSystem>
    
        <!--
            Search index and the file system it uses.
            class: FQN of class implementing the QueryHandler interface
        -->
        <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
            <param name="path" value="${rep.home}/workspaces/${wsp.name}/index"/>
            <param name="tikaConfigPath" value="${rep.home}/tika-config.xml"/>
            <param name="useCompoundFile" value="true"/>
            <param name="minMergeDocs" value="100"/>
            <param name="volatileIdleTime" value="3"/>
            <param name="maxMergeDocs" value="2147483647"/>
            <param name="mergeFactor" value="10"/>
            <param name="maxFieldLength" value="10000"/>
            <param name="bufferSize" value="10"/>
            <param name="cacheSize" value="1000"/>
            <param name="forceConsistencyCheck" value="false"/>
            <param name="enableConsistencyCheck" value="false"/>
            <param name="autoRepair" value="true"/>
            <param name="analyzer" value="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
            <param name="queryClass" value="org.apache.jackrabbit.core.query.QueryImpl"/>
            <param name="respectDocumentOrder" value="true"/>
            <param name="resultFetchSize" value="2147483647"/>
            <param name="extractorPoolSize" value="0"/>
            <param name="extractorTimeout" value="100"/>
            <param name="extractorBackLogSize" value="100"/>
            <param name="supportHighlighting" value="true"/>
            <param name="excerptProviderClass" value="org.apache.jackrabbit.core.query.lucene.DefaultXMLExcerpt"/>
        </SearchIndex>
    </Workspace>
    
    <!--
        Configures the versioning
    -->
    <Versioning rootPath="${rep.home}/version">
        <FileSystem class="org.apache.jackrabbit.core.fs.db.DbFileSystem">
            <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
            <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
            <param name="user" value="root" />
            <param name="password" value="root" />
            <param name="schema" value="mysql"/>
            <param name="schemaObjectPrefix" value="J_V_FS_"/>
        </FileSystem>
        <PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager">
            <param name="driver" value="com.mysql.jdbc.jdbc2.optional.MysqlDataSource"/>
            <param name="url" value="jdbc:mysql://:3306/db_ged_mysql" />
            <param name="user" value="root" />
            <param name="password" value="root" />
            <param name="schema" value="mysql" />
            <param name="schemaObjectPrefix" value="J_V_PM_" />
            <param name="externalBLOBs" value="false" />
        </PersistenceManager>
    </Versioning>
    
    <!--
        Search index for content that is shared repository wide
        (/jcr:system tree, contains mainly versions)
    
    <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
        <param name="path" value="${rep.home}/repository/index"/>
        <param name="extractorPoolSize" value="2"/>
        <param name="supportHighlighting" value="true"/>
    </SearchIndex>
    -->
    
    <!--
        Cluster configuration with system variables.
    
    -->
    
    <RepositoryLockMechanism class="org.apache.jackrabbit.core.util.CooperativeFileLock" />
    
    </Repository>
    
  • tika-config.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <properties>
    
    <mimeTypeRepository resource="/org/apache/tika/mime/tika-mimetypes.xml" magic="false"/>
    
    <parsers>
    
    <parser name="parse-dcxml" class="org.apache.tika.parser.xml.DcXMLParser">
      <mime>application/xml</mime>
      <mime>image/svg+xml</mime>
    </parser>
    
    <parser name="parse-office" class="org.apache.tika.parser.microsoft.OfficeParser">
      <mime>application/x-tika-msoffice</mime>
      <mime>application/msword</mime>
      <mime>application/vnd.ms-excel</mime>
      <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
      <mime>application/vnd.ms-powerpoint</mime>
      <mime>application/vnd.visio</mime>
      <mime>application/vnd.ms-outlook</mime>
    </parser>
    
    <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
      <mime>application/x-tika-ooxml</mime>
      <mime>application/vnd.openxmlformats-package.core-properties+xml</mime>
      <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
      <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
      <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
      <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
      <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
      <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
      <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
      <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
      <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
      <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
      <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
      <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
      <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
      <mime>application/vnd.ms-word.document.macroenabled.12</mime>
      <mime>application/vnd.ms-word.template.macroenabled.12</mime>
    </parser>
    
    <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">
      <mime>text/html</mime>
      <mime>application/xhtml+xml</mime>
      <mime>application/x-asp</mime>
    </parser>
    
    <parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser">
      <mime>application/rtf</mime>
    </parser>
    
    <parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser">
      <mime>application/pdf</mime>
    </parser>
    
    <parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser">
      <mime>text/plain</mime>
    </parser>
    
    <parser name="parse-openoffice" class="org.apache.tika.parser.opendocument.OpenOfficeParser">
      <mime>application/vnd.sun.xml.writer</mime>
      <mime>application/vnd.oasis.opendocument.text</mime>
      <mime>application/vnd.oasis.opendocument.graphics</mime>
      <mime>application/vnd.oasis.opendocument.presentation</mime>
      <mime>application/vnd.oasis.opendocument.spreadsheet</mime>
      <mime>application/vnd.oasis.opendocument.chart</mime>
      <mime>application/vnd.oasis.opendocument.image</mime>
      <mime>application/vnd.oasis.opendocument.formula</mime>
      <mime>application/vnd.oasis.opendocument.text-master</mime>
      <mime>application/vnd.oasis.opendocument.text-web</mime>
      <mime>application/vnd.oasis.opendocument.text-template</mime>
      <mime>application/vnd.oasis.opendocument.graphics-template</mime>
      <mime>application/vnd.oasis.opendocument.presentation-template</mime>
      <mime>application/vnd.oasis.opendocument.spreadsheet-template</mime>
      <mime>application/vnd.oasis.opendocument.chart-template</mime>
      <mime>application/vnd.oasis.opendocument.image-template</mime>
      <mime>application/vnd.oasis.opendocument.formula-template</mime>
      <mime>application/x-vnd.oasis.opendocument.text</mime>
      <mime>application/x-vnd.oasis.opendocument.graphics</mime>
      <mime>application/x-vnd.oasis.opendocument.presentation</mime>
      <mime>application/x-vnd.oasis.opendocument.spreadsheet</mime>
      <mime>application/x-vnd.oasis.opendocument.chart</mime>
      <mime>application/x-vnd.oasis.opendocument.image</mime>
      <mime>application/x-vnd.oasis.opendocument.formula</mime>
      <mime>application/x-vnd.oasis.opendocument.text-master</mime>
      <mime>application/x-vnd.oasis.opendocument.text-web</mime>
      <mime>application/x-vnd.oasis.opendocument.text-template</mime>
      <mime>application/x-vnd.oasis.opendocument.graphics-template</mime>
      <mime>application/x-vnd.oasis.opendocument.presentation-template</mime>
      <mime>application/x-vnd.oasis.opendocument.spreadsheet-template</mime>
      <mime>application/x-vnd.oasis.opendocument.chart-template</mime>
      <mime>application/x-vnd.oasis.opendocument.image-template</mime>
      <mime>application/x-vnd.oasis.opendocument.formula-template</mime>
    </parser>
    
    <parser name="parse-image" class="org.apache.tika.parser.image.ImageParser">
      <mime>image/bmp</mime>
      <mime>image/gif</mime>
      <mime>image/jpeg</mime>
      <mime>image/png</mime>
      <mime>image/tiff</mime>
      <mime>image/vnd.wap.wbmp</mime>
      <mime>image/x-icon</mime>
      <mime>image/x-psd</mime>
      <mime>image/x-xcf</mime>
    </parser>
    
    <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser">
      <mime>application/x-tika-java-class</mime>
    </parser>
    
    <parser name="parse-mp3" class="org.apache.tika.parser.mp3.Mp3Parser">
      <mime>audio/mpeg</mime>
    </parser>
    
    <parser name="parse-midi" class="org.apache.tika.parser.audio.MidiParser">
      <mime>application/x-midi</mime>
      <mime>audio/midi</mime>
    </parser>
    
    <parser name="parse-audio" class="org.apache.tika.parser.audio.AudioParser">
      <mime>audio/basic</mime>
      <mime>audio/x-wav</mime>
      <mime>audio/x-aiff</mime>
    </parser>
    
    </parsers>
    
    </properties>
    

All query from the bean work except when I call the function public List<Fichier> findAllByContains(String path,String motCles) to fulltext search into .docx and .xslx document. Fulltext search on .pdf, .txt, .xml, .xls, .doc, ... work perfectly.

like image 280
Aroniaina Avatar asked Oct 12 '15 14:10

Aroniaina


1 Answers

Ref: http://jackrabbit.510166.n4.nabble.com/Office-2007-documents-not-being-indexed-in-Jackrabbit-2-4-3-td4657380.html

On the same line, I have observed commons-compress-1.5.jar is required by Tika parser in case of OOXML types of documents (i.e. office 2007 documents).

Now, I am able to index & search most of types of documents (office 2007 - docx, pptx, xlsx , office 2003 - doc, ppt, xls, PDF) using below 2 steps:

(1) Updated repository.xml & added Further details can be found at https://issues.apache.org/jira/browse/JCR-3287

(2) Added commons-compress-1.5.jar classpath while running jackrabbit-standalone-2.6.2.jar

like image 172
Ashok Goli Avatar answered Dec 05 '22 00:12

Ashok Goli