Chapter 3. BerlinMOD Benchmark on MobilityDB

Table of Contents

Loading the Data
Loading the Data in Partitioned Tables
BerlinMOD/r Queries

BerlinMOD is a standard benchmark for moving object DBMSs. It provides a data generator, pregenerated benchmark data for different scale factors, and set of queries of two types: 17 range-style queries (called BerlinMOD/r), and 9 nearest-neighbours queries (called BerlinMOD/NN). The MobilityDB tutorial presented in Chapter 1, MobilityDB Tutorial and its associated data were based on BerlinMOD. However, its purpose was to show the capabilities of MobilityDB. In this chapter, we show how to load pregenerated BerlinMOD data on MobilityDB and how to express the 17 queries in BerlinMOD/r. Some of these queries were already presented in Chapter 1, MobilityDB Tutorial.

Loading the Data

The script for loading pregenerated data is available here.

-------------------------------------------------------------------------------
-- Loads the BerlinMOD data in projected (2D) coordinates with SRID 5676
-- https://epsg.io/5676
-------------------------------------------------------------------------------

DROP FUNCTION IF EXISTS berlinmod_load();
CREATE OR REPLACE FUNCTION berlinmod_load(scale_factor text DEFAULT '0.005',
  path text DEFAULT '/usr/local/BerlinMOD/') 
RETURNS text AS $$
DECLARE
  fullpath text;
BEGIN
  fullpath = path || scale_factor || '/';
  DROP TABLE IF EXISTS streets;
  CREATE TABLE streets (
    StreetId integer,
    vmax integer,
    x1 double precision,
    y1 double precision,
    x2 double precision,
    y2 double precision,
    Geom geometry(LineString, 5676) );
  EXECUTE format('COPY streets(StreetId, vmax, x1, y1, x2, y2) FROM ''%sstreets.csv'' 
    DELIMITER '','' CSV HEADER', fullpath);
  UPDATE streets
  SET Geom = ST_Transform(ST_SetSRID(ST_MakeLine(ARRAY[ST_MakePoint(x1, y1),
    ST_MakePoint(x2, y2)]), 4326), 5676);

  DROP TABLE IF EXISTS Points CASCADE;
  CREATE TABLE Points (
    PointId integer,
    PosX double precision,
    PosY double precision,
    Geom geometry(Point, 5676) );
  EXECUTE format('COPY Points(PointId, PosX, PosY) FROM ''%spoints.csv'' 
    DELIMITER  '','' CSV HEADER', fullpath);
  UPDATE Points
  SET Geom = ST_Transform(ST_SetSRID(ST_MakePoint(PosX, PosY), 4326), 5676);

  CREATE INDEX Points_geom_idx ON Points USING gist(Geom);

  CREATE VIEW Points1(PointId, PosX, PosY, Geom) AS
  SELECT PointId, PosX, PosY, Geom
  FROM Points
  LIMIT 10;

  DROP TABLE IF EXISTS RegionsInput CASCADE;
  CREATE TABLE RegionsInput (
    RegionId integer,
    SegNo integer,
    XStart double precision,
    YStart double precision,
    XEnd double precision,
    YEnd double precision );
  EXECUTE format('COPY RegionsInput(RegionId, SegNo, XStart, YStart, XEnd, YEnd) 
    FROM ''%sregions.csv'' DELIMITER  '','' CSV HEADER', fullpath);

  DROP TABLE IF EXISTS Regions CASCADE;
  CREATE TABLE Regions (
    RegionId integer,
    Geom Geometry(Polygon, 5676) );
  INSERT INTO Regions (RegionId, Geom)
  WITH RegionsSegs AS (
    SELECT RegionId, SegNo, ST_Transform(ST_SetSRID(St_MakeLine(
      ST_MakePoint(XStart, YStart), ST_MakePoint(XEnd, YEnd)), 4326), 5676) AS Geom
    FROM RegionsInput )
  SELECT RegionId, ST_Polygon(ST_LineMerge(ST_Union(Geom ORDER BY SegNo)), 5676) AS Geom
  FROM RegionsSegs
  GROUP BY RegionId;

  CREATE INDEX Regions_geom_idx ON Regions USING gist(Geom);

  CREATE VIEW Regions1(RegionId, Geom) AS
  SELECT RegionId, Geom
  FROM Regions
  LIMIT 10;

  DROP TABLE IF EXISTS Instants CASCADE;
  CREATE TABLE Instants (
    InstantId integer,
    Instant timestamptz );
  EXECUTE format('COPY Instants(InstantId, Instant) FROM ''%sinstants.csv''
    DELIMITER  '','' CSV HEADER', fullpath);

  CREATE INDEX Instants_instant_btree_idx ON Instants USING btree(instant);

  CREATE VIEW Instants1(InstantId, Instant) AS
  SELECT InstantId, Instant 
  FROM Instants
  LIMIT 10;

  DROP TABLE IF EXISTS Periods CASCADE;
  CREATE TABLE Periods (
    PeriodId integer,
    BeginP timestamp,
    EndP timestamp,
    Period tstzspan );
  EXECUTE format('COPY Periods(PeriodId, BeginP, EndP) FROM ''%speriods.csv'' 
    DELIMITER  '','' CSV HEADER', fullpath);
  UPDATE Periods
  SET Period = tstzspan(BeginP,EndP);

  CREATE INDEX Periods_Period_gist_idx ON Periods USING gist(Period);

  CREATE VIEW Periods1(PeriodId, BeginP, EndP, Period) AS
  SELECT PeriodId, BeginP, EndP, Period
  FROM Periods
  LIMIT 10;

  DROP TABLE IF EXISTS Vehicles CASCADE;
  CREATE TABLE Vehicles (
    VehicleId integer PRIMARY KEY,
    Licence varchar(32),
    VehicleType varchar(32),
    Model varchar(32) );
  EXECUTE format('COPY Vehicles(VehicleId, Licence, VehicleType, Model) FROM ''%svehicles.csv'' 
    DELIMITER  '','' CSV HEADER', fullpath);

  DROP TABLE IF EXISTS Licences CASCADE;
  CREATE TABLE Licences (
    VehicleId integer PRIMARY KEY,
    LicenceId integer,
    Licence varchar(8) );
  EXECUTE format('COPY Licences(Licence, LicenceId) FROM ''%slicences.csv'' 
    DELIMITER  '','' CSV HEADER', fullpath);
  UPDATE Licences q
  SET VehicleId = ( SELECT v.VehicleId FROM Vehicles v WHERE v.Licence = q.Licence );

  CREATE INDEX Licences_VehId_idx ON Licences USING btree(VehicleId);

  CREATE VIEW Licences1(LicenceId, Licence, VehicleId) AS
  SELECT LicenceId, Licence, VehicleId
  FROM Licences
  LIMIT 10;

  CREATE VIEW Licences2(LicenceId, Licence, VehicleId) AS
  SELECT LicenceId, Licence, VehicleId
  FROM Licences
  LIMIT 10 OFFSET 10;

  DROP TABLE IF EXISTS TripsInput CASCADE;
  CREATE TABLE TripsInput (
    VehicleId integer,
    TripId integer,
    TStart timestamp without time zone,
    TEnd timestamp without time zone,
    XStart double precision,
    YStart double precision,
    XEnd double precision,
    YEnd double precision,
    Geom geometry(LineString) );
  EXECUTE format('COPY TripsInput(VehicleId, TripId, TStart, TEnd, XStart, YStart, XEnd, YEnd) 
    FROM ''%strips.csv'' DELIMITER  '','' CSV HEADER', fullpath);
  UPDATE TripsInput
  SET Geom = ST_Transform(ST_SetSRID(ST_MakeLine(ARRAY[ST_MakePoint(XStart, YStart),
    ST_MakePoint(XEnd, YEnd)]), 4326), 5676);

  DROP TABLE IF EXISTS TripsInputInstants;
  CREATE TABLE TripsInputInstants AS (
  SELECT VehicleId, TripId, TStart, XStart, YStart, 
    ST_Transform(ST_SetSRID(ST_MakePoint(XStart, YStart), 4326), 5676) as Geom
  FROM TripsInput
  UNION ALL
  SELECT t1.VehicleId, t1.TripId, t1.TEnd, t1.XEnd, t1.YEnd, 
    ST_Transform(ST_SetSRID(ST_MakePoint(t1.XEnd, t1.YEnd), 4326), 5676) as Geom
  FROM TripsInput t1 INNER JOIN (
    SELECT VehicleId, TripId, max(TEnd) as MaxTend
    FROM TripsInput 
    GROUP BY VehicleId, TripId
  ) t2 ON t1.VehicleId = t2.VehicleId AND t1.TripId = t2.TripId AND t1.TEnd = t2.MaxTend );
  ALTER TABLE TripsInputInstants ADD COLUMN inst tgeompoint;
  UPDATE TripsInputInstants
  SET inst = tgeompoint_inst(Geom, TStart);

  DROP TABLE IF EXISTS Trips CASCADE;
  CREATE TABLE Trips (
    TripId integer PRIMARY KEY,
    VehicleId integer NOT NULL,
    Trip tgeompoint,
    Traj geometry,
    PRIMARY KEY (VehicleId, TripId),
    FOREIGN KEY (VehicleId) REFERENCES Vehicles(VehicleId));
  INSERT INTO Trips
  SELECT VehicleId, TripId, tgeompoint_seq(array_agg(inst ORDER BY TStart))
  FROM TripsInputInstants
  GROUP BY VehicleId, TripId;
  UPDATE Trips
  SET Traj = trajectory(Trip);

  CREATE INDEX Trips_VehId_idx ON Trips USING btree(VehicleId);
  CREATE INDEX Trips_gist_idx ON Trips USING gist(trip);

  DROP VIEW IF EXISTS Trips1;
  CREATE VIEW Trips1 AS
  SELECT * FROM Trips LIMIT 100;

  -- Drop temporary tables
  DROP TABLE RegionsInput;
  DROP TABLE TripsInput;
  DROP TABLE TripsInputInstants;

  RETURN 'The End';
END;
$$ LANGUAGE 'plpgsql';
			

The script above creates a procedure to load pregenerated BerlinMOD data (in CSV format and WGS84 coordinates) at various scale factors. The procedure has two parameters: the scale factor and the directory where the CSV files are located. It supposes by default that the scale factor is 0.005 and that the CSV files are located in the directory /usr/local/BerlinMOD/<scale factor>/. Notice that the procedure creates GiST indexes for the tables. Alternatively, SP-GiST indexes could be used. The procedure can be called, for example, as follows.

SELECT berlinmod_load('0.05');