Cara menggunakan 2dsphere index mongodb

Recently, I started to investigate the performance of MongoDB with AIS Data. I used a collection with 19m documents with proper field types as described in the definition. I also created a new geoloc field with type: (Point) from coordinates (lon,lat) in this same collection.

The query under investigation is:

db.nari_dynamic.explain('executionStats').aggregate
(
[
  {
      "$match": {
           "geoloc": {
               "$geoWithin": {
                   "$geometry": {
                       "type" : "Polygon" ,
                       "coordinates": [ [ [ -5.00, 45.00 ], [ +0.00, 45.00 ], [ +0.00, 50.00 ], [ -5.00, 50.00 ], [ -5.00, 45.00 ] ] ]
              }}}}
  },

  { "$group": {"_id": "$sourcemmsi", "PointCount": {"$sum" : 1}, "MinDatePoint": {"$min" : {"date": "$t3" }}, "MaxDatePoint": {"$max" : {"date": "$t3" }} }},
  { "$sort": {"_id":1} },
  { "$limit":100 },
  { "$project": {"_id":1, "PointCount":1, "MinDatePoint":1, "MaxDatePoint":1} }
],
{ explain:true}
)

During investigation and testing I found the following:

  1. Without any index: 94s
  2. With geoloc-2dsphere index: 280s

Here are the Execution Stats: Without the Index

{ stages: 
   [ { '$cursor': 
        { queryPlanner: 
           { plannerVersion: 1,
             namespace: 'mscdata.nari_dynamic',
             indexFilterSet: false,
             parsedQuery: 
              { geoloc: 
                 { '$geoWithin': 
                    { '$geometry': 
                       { type: 'Polygon',
                         coordinates: [ [ [ -5, 45 ], [ 0, 45 ], [ 0, 50 ], [ -5, 50 ], [ -5, 45 ] ] ] } } } },
             queryHash: '6E2EAB94',
             planCacheKey: '6E2EAB94',
             winningPlan: 
              { stage: 'PROJECTION_SIMPLE',
                transformBy: { sourcemmsi: 1, t3: 1, _id: 0 },
                inputStage: 
                 { stage: 'COLLSCAN',
                   filter: 
                    { geoloc: 
                       { '$geoWithin': 
                          { '$geometry': 
                             { type: 'Polygon',
                               coordinates: [ [ [ -5, 45 ], [ 0, 45 ], [ 0, 50 ], [ -5, 50 ], [ -5, 45 ] ] ] } } } },
                   direction: 'forward' } },
             rejectedPlans: [] } } },
     { '$group': 
        { _id: '$sourcemmsi',
          PointCount: { '$sum': { '$const': 1 } },
          MinDatePoint: { '$min': { date: '$t3' } },
          MaxDatePoint: { '$max': { date: '$t3' } } } },
     { '$sort': { sortKey: { _id: 1 }, limit: 100 } },
     { '$project': 
        { _id: true,
          PointCount: true,
          MaxDatePoint: true,
          MinDatePoint: true } } ],
  serverInfo: 
   { host: 'ubuntu16',
     port: 27017,
     version: '4.4.1',
     gitVersion: 'ad91a93a5a31e175f5cbf8c69561e788bbc55ce1' },
  ok: 1 }

Here are the Execution Stats: With the Index

{ stages: 
   [ { '$cursor': 
        { queryPlanner: 
           { plannerVersion: 1,
             namespace: 'mscdata.nari_dynamic',
             indexFilterSet: false,
             parsedQuery: 
              { geoloc: 
                 { '$geoWithin': 
                    { '$geometry': 
                       { type: 'Polygon',
                         coordinates: [ [ [ -5, 45 ], [ 0, 45 ], [ 0, 50 ], [ -5, 50 ], [ -5, 45 ] ] ] } } } },
             queryHash: '6E2EAB94',
             planCacheKey: 'F35B194B',
             winningPlan: 
              { stage: 'PROJECTION_SIMPLE',
                transformBy: { sourcemmsi: 1, t3: 1, _id: 0 },
                inputStage: 
                 { stage: 'FETCH',
                   filter: 
                    { geoloc: 
                       { '$geoWithin': 
                          { '$geometry': 
                             { type: 'Polygon',
                               coordinates: [ [ [ -5, 45 ], [ 0, 45 ], [ 0, 50 ], [ -5, 50 ], [ -5, 45 ] ] ] } } } },
                   inputStage: 
                    { stage: 'IXSCAN',
                      keyPattern: { geoloc: '2dsphere' },
                      indexName: 'geoloc-field',
                      isMultiKey: false,
                      multiKeyPaths: { geoloc: [] },
                      isUnique: false,
                      isSparse: false,
                      isPartial: false,
                      indexVersion: 2,
                      direction: 'forward',
                      indexBounds: 
                       { geoloc: 
                          [ '[936748722493063168, 936748722493063168]',
                            '[954763121002545152, 954763121002545152]',
                            '[959266720629915648, 959266720629915648]',
                            '[960392620536758272, 960392620536758272]',
                            '[960674095513468928, 960674095513468928]',
                            '[960744464257646592, 960744464257646592]',
                            '[960762056443691008, 960762056443691008]',
                            '[960766454490202112, 960766454490202112]',
                            '[960767554001829888, 960767554001829888]',
                            '[960767828879736832, 960767828879736832]',
                            '[960767897599213568, 960767897599213568]',
                            '[960767914779082752, 960767914779082752]',
                            '[960767919074050048, 960767919074050048]',
                            '[960767920147791872, 960767920147791872]',
                            '[960767920416227328, 960767920416227328]',
                            '[960767920483336192, 960767920483336192]',
                            '[960767920500113408, 960767920500113408]',
                            '[960767920504307712, 960767920504307712]',
                            '[960767920505356288, 960767920505356288]',
                            '[960767920505618432, 960767920505618432]',
                            '[960767920505683968, 960767920505683968]',
                            '[960767920505683969, 960767920505716735]',
                            '[1345075088707977217, 1345075088708009983]',
                            '[1345075088708009984, 1345075088708009984]',
                            '[1345075088708075520, 1345075088708075520]',
                            '[1345075088708337664, 1345075088708337664]',
                            '[1345075088709386240, 1345075088709386240]',
                            '[1345075088713580544, 1345075088713580544]',
                            '[1345075088730357760, 1345075088730357760]',
                            '[1345075088797466624, 1345075088797466624]',
                            '[1345075089065902080, 1345075089065902080]',
                            '[1345075090139643904, 1345075090139643904]',
                            '[1345075094434611200, 1345075094434611200]',
                            '[1345075111614480384, 1345075111614480384]',
                            '[1345075180333957120, 1345075180333957120]',
                            '[1345075455211864064, 1345075455211864064]',
                            '[1345076554723491840, 1345076554723491840]',
                            '[1345080952770002944, 1345080952770002944]',
                            '[1345098544956047360, 1345098544956047360]',
                            '[1345168913700225024, 1345168913700225024]',
                            '[1345450388676935680, 1345450388676935680]',
                            '[1346576288583778304, 1346576288583778304]',
                            '[1351079888211148800, 1351079888211148800]',
                            '[1369094286720630784, 1369094286720630784]',
                            '[5116089176692883456, 5116089176692883456]',
                            '[5170132372221329408, 5170132372221329408]',
                            '[5179139571476070401, 5179702521429491711]',
                            '[5179702521429491713, 5180265471382913023]',
                            '[5180265471382913024, 5180265471382913024]',
                            '[5183643171103440896, 5183643171103440896]',
                            '[5187020870823968768, 5187020870823968768]',
                            '[5187020870823968769, 5187583820777390079]',
                            '[5187583820777390081, 5188146770730811391]',
                            '[5188146770730811393, 5197153969985552383]',
                            '[5206161169240293376, 5206161169240293376]',
                            '[5218264593238851584, 5218264593238851584]',
                            '[5218264593238851585, 5218405330727206911]',
                            '[5218546068215562240, 5218546068215562240]',
                            '[5218546068215562241, 5219109018168983551]',
                            '[5219671968122404864, 5219671968122404864]',
                            '[5220234918075826177, 5220797868029247487]',
                            '[5220797868029247488, 5220797868029247488]',
                            '[5220938605517602817, 5221079343005958143]',
                            '[5221079343005958144, 5221079343005958144]',
                            '[5260204364768739328, 5260204364768739328]' ] } } } },
             rejectedPlans: [] } } },
     { '$group': 
        { _id: '$sourcemmsi',
          PointCount: { '$sum': { '$const': 1 } },
          MinDatePoint: { '$min': { date: '$t3' } },
          MaxDatePoint: { '$max': { date: '$t3' } } } },
     { '$sort': { sortKey: { _id: 1 }, limit: 100 } },
     { '$project': 
        { _id: true,
          MinDatePoint: true,
          MaxDatePoint: true,
          PointCount: true } } ],
  serverInfo: 
   { host: 'ubuntu16',
     port: 27017,
     version: '4.4.1',
     gitVersion: 'ad91a93a5a31e175f5cbf8c69561e788bbc55ce1' },
  ok: 1 }

Of course, I understand that is more complex as the query has a grouping function, but the idea is that usually, we will get something quicker and not slower with the index unless the index causes a different sorting inside the engine as geoNear does.

Also, there is a complete analysis from MongoDB if how the queries and index improvements have an impact on the queries, but not so much info for geoWithin. MongoDB states that the results are not sorted with GeoWithin, so I don't find the reason for the delay. https://www.mongodb.com/blog/post/geospatial-performance-improvements-in-mongodb-3-2