BerlinMOD is a standard benchmark for moving object DBMSs. It provides a data generator, pregenerated benchmark data for different scale factors, and set of queries of two types: 17 range-style queries (called BerlinMOD/r), and 9 nearest-neighbours queries (called BerlinMOD/NN). The MobilityDB tutorial presented in Chapter 1, MobilityDB Tutorial and its associated data were based on BerlinMOD. However, its purpose was to show the capabilities of MobilityDB. In this chapter, we show how to load pregenerated BerlinMOD data on MobilityDB and how to express the 17 queries in BerlinMOD/r. Some of these queries were already presented in Chapter 1, MobilityDB Tutorial.
The script for loading pregenerated data is available here.
------------------------------------------------------------------------------- -- Loads the BerlinMOD data in projected (2D) coordinates with SRID 5676 -- https://epsg.io/5676 ------------------------------------------------------------------------------- DROP FUNCTION IF EXISTS berlinmod_load(); CREATE OR REPLACE FUNCTION berlinmod_load(scale_factor text DEFAULT '0.005', path text DEFAULT '/usr/local/BerlinMOD/') RETURNS text AS $$ DECLARE fullpath text; BEGIN fullpath = path || scale_factor || '/'; DROP TABLE IF EXISTS streets; CREATE TABLE streets ( StreetId integer, vmax integer, x1 double precision, y1 double precision, x2 double precision, y2 double precision, Geom geometry(LineString, 5676) ); EXECUTE format('COPY streets(StreetId, vmax, x1, y1, x2, y2) FROM ''%sstreets.csv'' DELIMITER '','' CSV HEADER', fullpath); UPDATE streets SET Geom = ST_Transform(ST_SetSRID(ST_MakeLine(ARRAY[ST_MakePoint(x1, y1), ST_MakePoint(x2, y2)]), 4326), 5676); DROP TABLE IF EXISTS Points CASCADE; CREATE TABLE Points ( PointId integer, PosX double precision, PosY double precision, Geom geometry(Point, 5676) ); EXECUTE format('COPY Points(PointId, PosX, PosY) FROM ''%spoints.csv'' DELIMITER '','' CSV HEADER', fullpath); UPDATE Points SET Geom = ST_Transform(ST_SetSRID(ST_MakePoint(PosX, PosY), 4326), 5676); CREATE INDEX Points_geom_idx ON Points USING gist(Geom); CREATE VIEW Points1(PointId, PosX, PosY, Geom) AS SELECT PointId, PosX, PosY, Geom FROM Points LIMIT 10; DROP TABLE IF EXISTS RegionsInput CASCADE; CREATE TABLE RegionsInput ( RegionId integer, SegNo integer, XStart double precision, YStart double precision, XEnd double precision, YEnd double precision ); EXECUTE format('COPY RegionsInput(RegionId, SegNo, XStart, YStart, XEnd, YEnd) FROM ''%sregions.csv'' DELIMITER '','' CSV HEADER', fullpath); DROP TABLE IF EXISTS Regions CASCADE; CREATE TABLE Regions ( RegionId integer, Geom Geometry(Polygon, 5676) ); INSERT INTO Regions (RegionId, Geom) WITH RegionsSegs AS ( SELECT RegionId, SegNo, ST_Transform(ST_SetSRID(St_MakeLine( ST_MakePoint(XStart, YStart), ST_MakePoint(XEnd, YEnd)), 4326), 5676) AS Geom FROM RegionsInput ) SELECT RegionId, ST_Polygon(ST_LineMerge(ST_Union(Geom ORDER BY SegNo)), 5676) AS Geom FROM RegionsSegs GROUP BY RegionId; CREATE INDEX Regions_geom_idx ON Regions USING gist(Geom); CREATE VIEW Regions1(RegionId, Geom) AS SELECT RegionId, Geom FROM Regions LIMIT 10; DROP TABLE IF EXISTS Instants CASCADE; CREATE TABLE Instants ( InstantId integer, Instant timestamptz ); EXECUTE format('COPY Instants(InstantId, Instant) FROM ''%sinstants.csv'' DELIMITER '','' CSV HEADER', fullpath); CREATE INDEX Instants_instant_btree_idx ON Instants USING btree(instant); CREATE VIEW Instants1(InstantId, Instant) AS SELECT InstantId, Instant FROM Instants LIMIT 10; DROP TABLE IF EXISTS Periods CASCADE; CREATE TABLE Periods ( PeriodId integer, BeginP timestamp, EndP timestamp, Period tstzspan ); EXECUTE format('COPY Periods(PeriodId, BeginP, EndP) FROM ''%speriods.csv'' DELIMITER '','' CSV HEADER', fullpath); UPDATE Periods SET Period = tstzspan(BeginP,EndP); CREATE INDEX Periods_Period_gist_idx ON Periods USING gist(Period); CREATE VIEW Periods1(PeriodId, BeginP, EndP, Period) AS SELECT PeriodId, BeginP, EndP, Period FROM Periods LIMIT 10; DROP TABLE IF EXISTS Vehicles CASCADE; CREATE TABLE Vehicles ( VehicleId integer PRIMARY KEY, Licence varchar(32), VehicleType varchar(32), Model varchar(32) ); EXECUTE format('COPY Vehicles(VehicleId, Licence, VehicleType, Model) FROM ''%svehicles.csv'' DELIMITER '','' CSV HEADER', fullpath); DROP TABLE IF EXISTS Licences CASCADE; CREATE TABLE Licences ( VehicleId integer PRIMARY KEY, LicenceId integer, Licence varchar(8) ); EXECUTE format('COPY Licences(Licence, LicenceId) FROM ''%slicences.csv'' DELIMITER '','' CSV HEADER', fullpath); UPDATE Licences q SET VehicleId = ( SELECT v.VehicleId FROM Vehicles v WHERE v.Licence = q.Licence ); CREATE INDEX Licences_VehId_idx ON Licences USING btree(VehicleId); CREATE VIEW Licences1(LicenceId, Licence, VehicleId) AS SELECT LicenceId, Licence, VehicleId FROM Licences LIMIT 10; CREATE VIEW Licences2(LicenceId, Licence, VehicleId) AS SELECT LicenceId, Licence, VehicleId FROM Licences LIMIT 10 OFFSET 10; DROP TABLE IF EXISTS TripsInput CASCADE; CREATE TABLE TripsInput ( VehicleId integer, TripId integer, TStart timestamp without time zone, TEnd timestamp without time zone, XStart double precision, YStart double precision, XEnd double precision, YEnd double precision, Geom geometry(LineString) ); EXECUTE format('COPY TripsInput(VehicleId, TripId, TStart, TEnd, XStart, YStart, XEnd, YEnd) FROM ''%strips.csv'' DELIMITER '','' CSV HEADER', fullpath); UPDATE TripsInput SET Geom = ST_Transform(ST_SetSRID(ST_MakeLine(ARRAY[ST_MakePoint(XStart, YStart), ST_MakePoint(XEnd, YEnd)]), 4326), 5676); DROP TABLE IF EXISTS TripsInputInstants; CREATE TABLE TripsInputInstants AS ( SELECT VehicleId, TripId, TStart, XStart, YStart, ST_Transform(ST_SetSRID(ST_MakePoint(XStart, YStart), 4326), 5676) as Geom FROM TripsInput UNION ALL SELECT t1.VehicleId, t1.TripId, t1.TEnd, t1.XEnd, t1.YEnd, ST_Transform(ST_SetSRID(ST_MakePoint(t1.XEnd, t1.YEnd), 4326), 5676) as Geom FROM TripsInput t1 INNER JOIN ( SELECT VehicleId, TripId, max(TEnd) as MaxTend FROM TripsInput GROUP BY VehicleId, TripId ) t2 ON t1.VehicleId = t2.VehicleId AND t1.TripId = t2.TripId AND t1.TEnd = t2.MaxTend ); ALTER TABLE TripsInputInstants ADD COLUMN inst tgeompoint; UPDATE TripsInputInstants SET inst = tgeompoint_inst(Geom, TStart); DROP TABLE IF EXISTS Trips CASCADE; CREATE TABLE Trips ( TripId integer PRIMARY KEY, VehicleId integer NOT NULL, Trip tgeompoint, Traj geometry, PRIMARY KEY (VehicleId, TripId), FOREIGN KEY (VehicleId) REFERENCES Vehicles(VehicleId)); INSERT INTO Trips SELECT VehicleId, TripId, tgeompoint_seq(array_agg(inst ORDER BY TStart)) FROM TripsInputInstants GROUP BY VehicleId, TripId; UPDATE Trips SET Traj = trajectory(Trip); CREATE INDEX Trips_VehId_idx ON Trips USING btree(VehicleId); CREATE INDEX Trips_gist_idx ON Trips USING gist(trip); DROP VIEW IF EXISTS Trips1; CREATE VIEW Trips1 AS SELECT * FROM Trips LIMIT 100; -- Drop temporary tables DROP TABLE RegionsInput; DROP TABLE TripsInput; DROP TABLE TripsInputInstants; RETURN 'The End'; END; $$ LANGUAGE 'plpgsql';
The script above creates a procedure to load pregenerated BerlinMOD data (in CSV format and WGS84 coordinates) at various scale factors. The procedure has two parameters: the scale factor and the directory where the CSV files are located. It supposes by default that the scale factor is 0.005
and that the CSV files are located in the directory /usr/local/BerlinMOD/<scale factor>/
. Notice that the procedure creates GiST indexes for the tables. Alternatively, SP-GiST indexes could be used. The procedure can be called, for example, as follows.
SELECT berlinmod_load('0.05');