Monday, May 11, 2020

Solr DIH with pgsql

Configuring Solr DataImportHandler with postgres (Good as of Solr 8.5.1)


  • Copy the database jdbc driver jar to ${solr.install.dir:../../../..}/contrib/dataimporthandler-extras/lib



  • Add the following lines to solrconfig.xml of the core which needs DIH enabled:


// To allow new fields to be added to schema automatically
  <schemaFactory class="ManagedIndexSchemaFactory">
   ​ <bool name="mutable">true</bool>
    ​<str name="managedSchemaResourceName">managed-schema</str>
  ​</schemaFactory>
//Add DIH jars to classpath
<lib dir="${solr.install.dir:../../../..}/contrib/dataimporthandler-extras/lib" regex=".*\.jar" />
        <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar" />
// Provide config file name for data import
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
  <lst name="defaults">
    <str name="config">data-config.xml</str>
  </lst>
</requestHandler>


  • Modify the following section to make sure it adds only single valued fields by default:


  <updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields">
    <lst name="typeMapping">
      <str name="valueClass">java.lang.String</str>
      <str name="fieldType">string</str>
      <lst name="copyField">
        <str name="dest">*_str</str>
        <int name="maxChars">256</int>
      </lst>
      <!-- Use as default mapping instead of defaultFieldType -->
      <bool name="default">true</bool>
    </lst>
    <lst name="typeMapping">
      <str name="valueClass">java.lang.Boolean</str>
      <str name="fieldType">boolean</str>
    </lst>
    <lst name="typeMapping">
      <str name="valueClass">java.util.Date</str>
      <str name="fieldType">pdate</str>
    </lst>
    <lst name="typeMapping">
      <str name="valueClass">java.lang.Long</str>
      <str name="valueClass">java.lang.Integer</str>
      <str name="fieldType">plong</str>
    </lst>
    <lst name="typeMapping">
      <str name="valueClass">java.lang.Number</str>
      <str name="fieldType">pdouble</str>
    </lst>
  </updateProcessor>

 

Update/Create your data-config.xml. Here's a sample:

 <dataConfig>
  <dataSource type="JdbcDataSource" name="db-name" driver="org.postgresql.Driver" url="jdbc:postgresql://server-hostname:5432/db-name" user="uname" password="pword" />
  <document name="layer_features">
    <entity name="name-on-solr-admin-dropdown" query="select 'aa2c4f3f-b600-4ee9-9997-f030b4e1f4de' as layers_id,ST_AsText(z.geom) as geometry, c.* from zip c, florida_zip_simplified z where c.zip=z.&quot;ZIP&quot; and c.countyname=z.&quot;COUNTYNAME&quot;;
" collection="layer_features">
    <field column="countyname" name="countyname"/>
    <field column="poname" name="poname"/>
    <field column="depcode" name="depcode"/>
    <field column="zip" name="zip"/>
    <field column="cases_1" name="casesall"/>
    <field column="geometry" name="geometry"/>
    <field column="layers_id" name="layers_id"/>   
    </entity>
  </document>
</dataConfig>

Note: column is the name from database, name is the field name on solr schema.
  • Restart Solr

No comments:

Post a Comment