BerlinMOD is a standard benchmark for moving object DBMSs. It provides a data generator, pregenerated benchmark data for different scale factors, and set of queries of two types: 17 range-style queries (called BerlinMOD/r), and 9 nearest-neighbours queries (called BerlinMOD/NN). The MobilityDB tutorial presented in Chapter 1, MobilityDB Tutorial and its associated data were based on BerlinMOD. However, its purpose was to show the capabilities of MobilityDB. In this chapter, we show how to load pregenerated BerlinMOD data on MobilityDB and how to express the 17 queries in BerlinMOD/r. Some of these queries were already presented in Chapter 1, MobilityDB Tutorial.
The script for loading pregenerated data is available here.
-------------------------------------------------------------------------------
-- Loads the BerlinMOD data in projected (2D) coordinates with SRID 5676
-- https://epsg.io/5676
-------------------------------------------------------------------------------
DROP FUNCTION IF EXISTS berlinmod_load();
CREATE OR REPLACE FUNCTION berlinmod_load(scale_factor text DEFAULT '0.005',
path text DEFAULT '/usr/local/BerlinMOD/')
RETURNS text AS $$
DECLARE
fullpath text;
BEGIN
fullpath = path || scale_factor || '/';
DROP TABLE IF EXISTS streets;
CREATE TABLE streets (
StreetId integer,
vmax integer,
x1 double precision,
y1 double precision,
x2 double precision,
y2 double precision,
Geom geometry(LineString, 5676) );
EXECUTE format('COPY streets(StreetId, vmax, x1, y1, x2, y2) FROM ''%sstreets.csv''
DELIMITER '','' CSV HEADER', fullpath);
UPDATE streets
SET Geom = ST_Transform(ST_SetSRID(ST_MakeLine(ARRAY[ST_MakePoint(x1, y1),
ST_MakePoint(x2, y2)]), 4326), 5676);
DROP TABLE IF EXISTS Points CASCADE;
CREATE TABLE Points (
PointId integer,
PosX double precision,
PosY double precision,
Geom geometry(Point, 5676) );
EXECUTE format('COPY Points(PointId, PosX, PosY) FROM ''%spoints.csv''
DELIMITER '','' CSV HEADER', fullpath);
UPDATE Points
SET Geom = ST_Transform(ST_SetSRID(ST_MakePoint(PosX, PosY), 4326), 5676);
CREATE INDEX Points_geom_idx ON Points USING gist(Geom);
CREATE VIEW Points1(PointId, PosX, PosY, Geom) AS
SELECT PointId, PosX, PosY, Geom
FROM Points
LIMIT 10;
DROP TABLE IF EXISTS RegionsInput CASCADE;
CREATE TABLE RegionsInput (
RegionId integer,
SegNo integer,
XStart double precision,
YStart double precision,
XEnd double precision,
YEnd double precision );
EXECUTE format('COPY RegionsInput(RegionId, SegNo, XStart, YStart, XEnd, YEnd)
FROM ''%sregions.csv'' DELIMITER '','' CSV HEADER', fullpath);
DROP TABLE IF EXISTS Regions CASCADE;
CREATE TABLE Regions (
RegionId integer,
Geom Geometry(Polygon, 5676) );
INSERT INTO Regions (RegionId, Geom)
WITH RegionsSegs AS (
SELECT RegionId, SegNo, ST_Transform(ST_SetSRID(St_MakeLine(
ST_MakePoint(XStart, YStart), ST_MakePoint(XEnd, YEnd)), 4326), 5676) AS Geom
FROM RegionsInput )
SELECT RegionId, ST_Polygon(ST_LineMerge(ST_Union(Geom ORDER BY SegNo)), 5676) AS Geom
FROM RegionsSegs
GROUP BY RegionId;
CREATE INDEX Regions_geom_idx ON Regions USING gist(Geom);
CREATE VIEW Regions1(RegionId, Geom) AS
SELECT RegionId, Geom
FROM Regions
LIMIT 10;
DROP TABLE IF EXISTS Instants CASCADE;
CREATE TABLE Instants (
InstantId integer,
Instant timestamptz );
EXECUTE format('COPY Instants(InstantId, Instant) FROM ''%sinstants.csv''
DELIMITER '','' CSV HEADER', fullpath);
CREATE INDEX Instants_instant_btree_idx ON Instants USING btree(instant);
CREATE VIEW Instants1(InstantId, Instant) AS
SELECT InstantId, Instant
FROM Instants
LIMIT 10;
DROP TABLE IF EXISTS Periods CASCADE;
CREATE TABLE Periods (
PeriodId integer,
BeginP timestamp,
EndP timestamp,
Period tstzspan );
EXECUTE format('COPY Periods(PeriodId, BeginP, EndP) FROM ''%speriods.csv''
DELIMITER '','' CSV HEADER', fullpath);
UPDATE Periods
SET Period = tstzspan(BeginP,EndP);
CREATE INDEX Periods_Period_gist_idx ON Periods USING gist(Period);
CREATE VIEW Periods1(PeriodId, BeginP, EndP, Period) AS
SELECT PeriodId, BeginP, EndP, Period
FROM Periods
LIMIT 10;
DROP TABLE IF EXISTS Vehicles CASCADE;
CREATE TABLE Vehicles (
VehicleId integer PRIMARY KEY,
Licence varchar(32),
VehicleType varchar(32),
Model varchar(32) );
EXECUTE format('COPY Vehicles(VehicleId, Licence, VehicleType, Model) FROM ''%svehicles.csv''
DELIMITER '','' CSV HEADER', fullpath);
DROP TABLE IF EXISTS Licences CASCADE;
CREATE TABLE Licences (
VehicleId integer PRIMARY KEY,
LicenceId integer,
Licence varchar(8) );
EXECUTE format('COPY Licences(Licence, LicenceId) FROM ''%slicences.csv''
DELIMITER '','' CSV HEADER', fullpath);
UPDATE Licences q
SET VehicleId = ( SELECT v.VehicleId FROM Vehicles v WHERE v.Licence = q.Licence );
CREATE INDEX Licences_VehId_idx ON Licences USING btree(VehicleId);
CREATE VIEW Licences1(LicenceId, Licence, VehicleId) AS
SELECT LicenceId, Licence, VehicleId
FROM Licences
LIMIT 10;
CREATE VIEW Licences2(LicenceId, Licence, VehicleId) AS
SELECT LicenceId, Licence, VehicleId
FROM Licences
LIMIT 10 OFFSET 10;
DROP TABLE IF EXISTS TripsInput CASCADE;
CREATE TABLE TripsInput (
VehicleId integer,
TripId integer,
TStart timestamp without time zone,
TEnd timestamp without time zone,
XStart double precision,
YStart double precision,
XEnd double precision,
YEnd double precision,
Geom geometry(LineString) );
EXECUTE format('COPY TripsInput(VehicleId, TripId, TStart, TEnd, XStart, YStart, XEnd, YEnd)
FROM ''%strips.csv'' DELIMITER '','' CSV HEADER', fullpath);
UPDATE TripsInput
SET Geom = ST_Transform(ST_SetSRID(ST_MakeLine(ARRAY[ST_MakePoint(XStart, YStart),
ST_MakePoint(XEnd, YEnd)]), 4326), 5676);
DROP TABLE IF EXISTS TripsInputInstants;
CREATE TABLE TripsInputInstants AS (
SELECT VehicleId, TripId, TStart, XStart, YStart,
ST_Transform(ST_SetSRID(ST_MakePoint(XStart, YStart), 4326), 5676) as Geom
FROM TripsInput
UNION ALL
SELECT t1.VehicleId, t1.TripId, t1.TEnd, t1.XEnd, t1.YEnd,
ST_Transform(ST_SetSRID(ST_MakePoint(t1.XEnd, t1.YEnd), 4326), 5676) as Geom
FROM TripsInput t1 INNER JOIN (
SELECT VehicleId, TripId, max(TEnd) as MaxTend
FROM TripsInput
GROUP BY VehicleId, TripId
) t2 ON t1.VehicleId = t2.VehicleId AND t1.TripId = t2.TripId AND t1.TEnd = t2.MaxTend );
ALTER TABLE TripsInputInstants ADD COLUMN inst tgeompoint;
UPDATE TripsInputInstants
SET inst = tgeompoint_inst(Geom, TStart);
DROP TABLE IF EXISTS Trips CASCADE;
CREATE TABLE Trips (
TripId integer PRIMARY KEY,
VehicleId integer NOT NULL,
Trip tgeompoint,
Traj geometry,
PRIMARY KEY (VehicleId, TripId),
FOREIGN KEY (VehicleId) REFERENCES Vehicles(VehicleId));
INSERT INTO Trips
SELECT VehicleId, TripId, tgeompoint_seq(array_agg(inst ORDER BY TStart))
FROM TripsInputInstants
GROUP BY VehicleId, TripId;
UPDATE Trips
SET Traj = trajectory(Trip);
CREATE INDEX Trips_VehId_idx ON Trips USING btree(VehicleId);
CREATE INDEX Trips_gist_idx ON Trips USING gist(trip);
DROP VIEW IF EXISTS Trips1;
CREATE VIEW Trips1 AS
SELECT * FROM Trips LIMIT 100;
-- Drop temporary tables
DROP TABLE RegionsInput;
DROP TABLE TripsInput;
DROP TABLE TripsInputInstants;
RETURN 'The End';
END;
$$ LANGUAGE 'plpgsql';
The script above creates a procedure to load pregenerated BerlinMOD data (in CSV format and WGS84 coordinates) at various scale factors. The procedure has two parameters: the scale factor and the directory where the CSV files are located. It supposes by default that the scale factor is 0.005 and that the CSV files are located in the directory /usr/local/BerlinMOD/<scale factor>/. Notice that the procedure creates GiST indexes for the tables. Alternatively, SP-GiST indexes could be used. The procedure can be called, for example, as follows.
SELECT berlinmod_load('0.05');