I am reading data from Sql Server database/table using jdbc-river currently. As of now I have created a individual type for each of the table in my database. As next step in my implementation I would like to use parent/child types so that I can translate the relationship between my sql tables and store them.
Table1
Col_id| name| prop1|prop2|prop3
child_table1
col_id| table_id| child_prop1|child_prop2|child_prop3
curl -XPUT 'localhost:9200/_river/parent/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/test",
"user" : "",
"password" : "",
"sql" : "select * from table1",
"index" : "index1",
"type" : "parent"
}
}'
curl -XPUT 'localhost:9200/_river/child/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/test",
"user" : "",
"password" : "",
"sql" : "select * from child_table1",
"index" : "index1",
"type" : "child"
}
}'
curl -XPOST 'localhost:9200/_river/child/_mapping' -d '{
"child":{
"_parent": {"type": "parent"}
}
}'
I would like to store my data in the following format
{
"id": "1",
"name": "A leading wordsmith",
"prop1": "data",
"prop2": "data",
"prop3": "data",
"child": [
{
"child_prop1": "data",
"child_prop2": "data",
"child_prop3": "data",
}
{
"child_prop1": "data1",
"child_prop2": "data1",
"child_prop3": "data1",
}
]
}
Can anyone comment on how can I use jdbc-rivers to store my data as parent/child type for above scenario.
UPDATE Based on feedback following is the updated mapping & meta.
curl -XPOST 'http://localhost:9200/library' -d '{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"person": {
"properties": {
"person_id": {
"type": "integer"
},
"name": {
"type": "string"
}
}
},
"work": {
"_parent": {
"type": "person"
},
"properties": {
"person_id": {
"type": "integer",
"index": "not_analyzed"
},
"name": {
"type": "string"
},
"genre": {
"type": "string"
},
"publisher": {
"type": "string"
}
}
}
}
}'
curl -XPUT localhost:9200/_river/person/_meta -d '{
"type": "jdbc",
"jdbc": {
"driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
"url": "jdbc:sqlserver://127.0.0.1:1433;databaseName=blogcontext",
"user": "sa",
"password": "password",
"sql": "select person_id as _id, name from person",
"poll": "30s"
},
"index": {
"index": "library",
"type": "person",
"bulk_size": 500,
"autocommit": true
}
}'
curl -XPUT localhost:9200/_river/work/_meta -d '{
"type": "jdbc",
"jdbc": {
"driver": "com.microsoft.sqlserver.jdbc.SQLServerDriver",
"url": "jdbc:sqlserver://127.0.0.1:1433;databaseName=blogcontext",
"user": "sa",
"password": "password",
"sql": "select person_id as _parent,name,genre,publisher from work",
"poll": "30s"
},
"index": {
"index": "library",
"type": "work",
"bulk_size": 500,
"autocommit": true
}
}'
Log file
[2014-01-14 07:10:35,488][ERROR][OneShotRiverMouth ] bulk [1] error
org.elasticsearch.ElasticSearchIllegalArgumentException: Can't specify parent if no parent field has been configured
at org.elasticsearch.action.index.IndexRequest.process(IndexRequest.java:597)
at org.elasticsearch.action.bulk.TransportBulkAction.executeBulk(TransportBulkAction.java:165)
at org.elasticsearch.action.bulk.TransportBulkAction.doExecute(TransportBulkAction.java:140)
at org.elasticsearch.action.bulk.TransportBulkAction.doExecute(TransportBulkAction.java:63)
at org.elasticsearch.action.support.TransportAction.execute(TransportAction.java:63)
at org.elasticsearch.client.node.NodeClient.execute(NodeClient.java:92)
at org.elasticsearch.client.support.AbstractClient.bulk(AbstractClient.java:149)
at org.elasticsearch.action.bulk.BulkProcessor.execute(BulkProcessor.java:283)
at org.elasticsearch.action.bulk.BulkProcessor.access$400(BulkProcessor.java:46)
at org.elasticsearch.action.bulk.BulkProcessor$Flush.run(BulkProcessor.java:336)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:351)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:178)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:178)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:724)
thanks
Assumed that your tables look like:
table1
table_id| name| prop1|prop2|prop3
child_table1
child_id| table_id| child_prop1|child_prop2|child_prop3
You will need to select your primary row id and named it as "_id", your parent id and named it as "_parent"
curl -XPUT 'localhost:9200/_river/parent/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/test",
"user" : "",
"password" : "",
"sql" : "select table_id as _id, name, prop1, prop2, prop3 from table1",
"index" : "index1",
"type" : "parent"
}
}'
curl -XPUT 'localhost:9200/_river/child/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/test",
"user" : "",
"password" : "",
"sql" : "select child_id as _id, table_id as _parent, child_prop1, child_prop2, child_prop3 from child_table1",
"index" : "index1",
"type" : "child"
}
}'
And define the mapping parent/child as you did, then it's done. You can use parent/child queries to query the parent/child data now.
UPDATE:
I already use your newest mapping and create a sample database to import data. Everything work fine, I can index parent/child without any errors.
I'm using ES 0.9.5, jdbc-river 2.2.2.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With