The GeoNames geographical database covers all countries and contains over eleven million placenames that are available for download free of charge.
$DIR_INSTANCE/work.hxehost:~> sudo su - hxeadm
hxeadm@hxehost:/usr/sap/HXE/HDB90> cd $DIR_INSTANCE/work
hxeadm@hxehost:/usr/sap/HXE/HDB90/work> mkdir geonames
hxeadm@hxehost:/usr/sap/HXE/HDB90/work> cd geonames
hxeadm@hxehost:/usr/sap/HXE/HDB90/work/geonames> /usr/sap/HXE/HDB90/work/geonames/ (or whatever your directory is) and unpack them if necessarily. I am going to download data for Poland, so the file is PL.zip, but you should choose a file for your own country from http://download.geonames.org/export/dump/wget http://download.geonames.org/export/dump/PL.zip
wget http://download.geonames.org/export/dump/admin2Codes.txt
wget http://download.geonames.org/export/dump/admin1CodesASCII.txt
wget http://download.geonames.org/export/dump/featureCodes_en.txt
wget http://download.geonames.org/export/dump/timeZones.txt
wget http://download.geonames.org/export/dump/countryInfo.txt
wget http://download.geonames.org/export/dump/iso-languagecodes.txt
unzip PL.ziphxeadm@hxehost:/usr/sap/HXE/HDB90/work/geonames> ls -l
total 11432
-rw-r----- 1 hxeadm sapsys 7164293 Feb 28 02:32 PL.txt
-rw-r----- 1 hxeadm sapsys 1912525 Feb 28 01:32 PL.zip
-rw-r----- 1 hxeadm sapsys 137157 Feb 28 01:34 admin1CodesASCII.txt
-rw-r----- 1 hxeadm sapsys 2227362 Feb 28 01:34 admin2Codes.txt
-rw-r----- 1 hxeadm sapsys 31414 Feb 28 01:25 countryInfo.txt
-rw-r----- 1 hxeadm sapsys 58139 Feb 28 01:25 featureCodes_en.txt
-rw-r----- 1 hxeadm sapsys 131720 Feb 28 01:25 iso-languagecodes.txt
-rw-r----- 1 hxeadm sapsys 8323 Feb 28 02:32 readme.txt
-rw-r----- 1 hxeadm sapsys 14115 Feb 28 01:37 timeZones.txtalternateNames.zip because alternative names are available as a field in the main PL.txt file.create schema "GEONAME";
set schema "GEONAME";
--DROP TABLE "GEONAME"."ISO_LANGUAGECODES";
CREATE COLUMN TABLE "GEONAME"."ISO_LANGUAGECODES" ("ISO_639_3" CHAR(3),
"ISO_639_2" VARCHAR(11),
"ISO_639_1" VARCHAR(2),
"LANGUAGE_NAME" VARCHAR(60));
IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/iso-languagecodes.txt' INTO "GEONAME"."ISO_LANGUAGECODES"
WITH FIELD DELIMITED BY '\t'
ERROR LOG 'iso-languagecodes.err'
SKIP FIRST 1 ROW
THREADS 8;
--DROP TABLE "GEONAME"."COUNTRYINFO";
CREATE COLUMN TABLE "GEONAME"."COUNTRYINFO" ("ISO_ALPHA2" CHAR(2),
"ISO_ALPHA3" CHAR(3),
"ISO_NUMERIC" INTEGER,
"FIPS_CODE" VARCHAR(3),
"NAME" NVARCHAR(200),
"CAPITAL" NVARCHAR(200),
"AREAINSQKM" DOUBLE,
"POPULATION" INTEGER,
"CONTINENT" VARCHAR(2),
"TLD" VARCHAR(10),
"CURRENCYCODE" VARCHAR(3),
"CURRENCYNAME" VARCHAR(20),
"PHONE" VARCHAR(20),
"POSTALCODE" VARCHAR(100),
"POSTALCODEREGEX" VARCHAR(200),
"LANGUAGES" VARCHAR(200),
"GEONAMEID" INTEGER,
"NEIGHBORS" VARCHAR(50),
"EQUIVFIPSCODE" VARCHAR(3),
PRIMARY KEY ("ISO_ALPHA2"));
IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/countryInfo.txt' INTO "GEONAME"."COUNTRYINFO"
WITH FIELD DELIMITED BY '\t'
ERROR LOG 'countryInfo.err'
SKIP FIRST 51 ROW
THREADS 8;
--DROP TABLE "GEONAME"."ADMIN1CODES";
CREATE COLUMN TABLE "GEONAME"."ADMIN1CODES" ("CODE" VARCHAR(30),
"NAME" NVARCHAR(100),
"NAMEASCII" VARCHAR(100),
"GEONAMEID" INTEGER);
IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/admin1CodesASCII.txt' INTO "GEONAME"."ADMIN1CODES"
WITH FIELD DELIMITED BY '\t'
ERROR LOG 'admin1CodesASCII.err'
THREADS 8;
--DROP TABLE "GEONAME"."ADMIN2CODES";
CREATE COLUMN TABLE "GEONAME"."ADMIN2CODES" ("CODE" VARCHAR(30),
"NAME" NVARCHAR(100),
"NAMEASCII" VARCHAR(100),
"GEONAMEID" INTEGER);
IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/admin2Codes.txt' INTO "GEONAME"."ADMIN2CODES"
WITH FIELD DELIMITED BY '\t'
ERROR LOG 'admin2Codes.err'
THREADS 8;
--DROP TABLE "GEONAME"."FEATURECODES";
CREATE COLUMN TABLE "GEONAME"."FEATURECODES" ("CODE" VARCHAR(7),
"NAME" VARCHAR(60),
"DESCRIPTION" VARCHAR(256));
IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/featureCodes_en.txt' INTO "GEONAME"."FEATURECODES"
WITH FIELD DELIMITED BY '\t'
ERROR LOG 'featureCodes_en.err'
THREADS 8;
--DROP TABLE "GEONAME"."TIMEZONES";
CREATE COLUMN TABLE "GEONAME"."TIMEZONES" ("COUNTRYCODE" CHAR(2),
"TIMEZONEID" VARCHAR(200),
"GMT_OFFSET" DECIMAL(3,1),
"DST_OFFSET" DECIMAL(3,1),
"RAW_OFFSET" DECIMAL(3,1));
IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/timeZones.txt' INTO "GEONAME"."TIMEZONES"
WITH FIELD DELIMITED BY '\t'
ERROR LOG 'timeZones.err'
SKIP FIRST 1 ROW
THREADS 8;
--DROP TABLE "GEONAME"."CONTINENTCODES";
CREATE COLUMN TABLE "GEONAME"."CONTINENTCODES" ("CODE" CHAR(2),
"NAME" VARCHAR(20),
"GEONAMEID" INT ,
primary key ("CODE") );
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('AF', 'Africa', 6255146);
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('AS', 'Asia', 6255147);
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('EU', 'Europe', 6255148);
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('NA', 'North America', 6255149);
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('OC', 'Oceania', 6255151);
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('SA', 'South America', 6255150);
INSERT INTO "GEONAME"."CONTINENTCODES" VALUES ('AN', 'Antarctica', 6255152);
\t as a field separation.INSERT statements.SKIP FIRST 51 ROW when loading data from countryInfo.txt file. For some reasons the file contains 51 lines long description of its metadata at the beginning.LOC_4326 and LOC_3857. They will be used to store geospatial data types for locations in Spatial Reference Systems: 4326 (3D Round Earth model for GPS satellite navigation system with degrees as a unit of measurement) and 3857 (2D Pseudo-Mercator planar projection used by web maps with meters as a unit of measurement). The first one gives more precise spatial calculations of measures. The latter one is more suitable for spatial predicates and clustering."PUBLIC"."ST_SPATIAL_REFERENCE_SYSTEMS". But 3857 is not included by default, so let's add it before creating a table.CREATE SPATIAL REFERENCE SYSTEM "WGS 84 / Pseudo-Mercator" IDENTIFIED BY 3857 DEFINITION 'PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Mercator_1SP"],PARAMETER["central_meridian",0],PARAMETER["scale_factor",1],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],EXTENSION["PROJ4","+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs"],AUTHORITY["EPSG","3857"]]' ORGANIZATION "EPSG" IDENTIFIED BY 3857 TRANSFORM DEFINITION '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs' TYPE PLANAR COORDINATE X BETWEEN -20037508.342789248 AND 20037508.342789248 COORDINATE Y BETWEEN -20048966.104014635 AND 20048966.104014624 TOLERANCE DEFAULT SNAP TO GRID DEFAULT POLYGON FORMAT 'EvenOdd' STORAGE FORMAT 'Internal';
--drop table "GEONAME"."GEONAMES";
create column table "GEONAME"."GEONAMES"(
"GEONAMESID" INTEGER not null,
"NAME" NVARCHAR (200) null,
"ASCIINAME" VARCHAR (200) null,
"ALTERNATENAMES" CLOB null,
"LATITUDE" DECIMAL null,
"LONGITUDE" DECIMAL null,
"LOC_4326" ST_POINT(4326) null,
"LOC_3857" ST_POINT(3857) null,
"FEATURECLASS" VARCHAR (1) null,
"FEATURECODE" VARCHAR (10) null,
"COUNTRYCODE" VARCHAR (2) null,
"CC2" VARCHAR (200) null,
"ADMIN1CODE" VARCHAR (20) null,
"ADMIN2CODE" VARCHAR (80) null,
"ADMIN3CODE" VARCHAR (20) null,
"ADMIN4CODE" VARCHAR (20) null,
"POPULATION" BIGINT null,
"ELEVATION" INTEGER null,
"DEM" INTEGER null,
"TIMEZONE" VARCHAR (40) null,
"MODIFICATIONDATE" DATE null,
primary key ("GEONAMESID") );PL (or you country's) file...IMPORT FROM CSV FILE '/usr/sap/HXE/HDB90/work/geonames/PL.txt'
INTO "GEONAME"."GEONAMES"
WITH FIELD DELIMITED BY '\t'
DATE FORMAT 'YYYY-MM-DD'
ERROR LOG 'country.err'
NO TYPE CHECK
THREADS 8
OPTIONALLY ENCLOSED BY ''
COLUMN LIST ("GEONAMESID", "NAME", "ASCIINAME", "ALTERNATENAMES", "LATITUDE", "LONGITUDE", "FEATURECLASS",
"FEATURECODE", "COUNTRYCODE", "CC2", "ADMIN1CODE", "ADMIN2CODE", "ADMIN3CODE", "ADMIN4CODE", "POPULATION",
"ELEVATION", "DEM", "TIMEZONE", "MODIFICATIONDATE");LOC_4326 and LOC_3857 in the table.NAME field values start with " that HANA would treat as an enclosure character used to delimit field data. To avoid that we need to add OPTIONALLY ENCLOSED BY '' to the IMPORT statement.country.err file. Hopefully everything went smooth!LONGITUDE and LATITUDE columns. We will use ST_GeomFromText() method to construct .UPDATE "GEONAME"."GEONAMES"
SET "LOC_4326" = ST_GeomFromText('POINT(' || "LONGITUDE" || ' ' || "LATITUDE" || ')', 4326),
"LOC_3857" = ST_GeomFromText('POINT(' || "LONGITUDE" || ' ' || "LATITUDE" || ')', 4326).ST_Transform(3857);select ST_UnionAggr(loc_4326).ST_asGeoJSON() as "PLACES"
from "GEONAME"."GEONAMES" n
join "GEONAME"."FEATURECODES" f
on f."CODE" = n."FEATURECLASS" || '.' || n."FEATURECODE"
where f."NAME" = 'abandoned populated place';
3857 for clustering, but Round-Earth 4326 for precise distance calculation. And I skipped joining with "GEONAME"."FEATURECODES" to make SQL more clear, but basically PPLQ are abandoned locations and PPLA is an inhabited location.select
ST_UnionAggr("cluster").ST_Transform(4326).ST_asGeoJSON()
from ( select
"cluster_id",
ST_UnionAggr("LOC_3857").ST_AlphaShape(7100) as "cluster"
from ( select
ST_ClusterID() OVER (CLUSTER BY "LOC_3857" USING DBSCAN EPS 7000 MINPTS 3) AS "cluster_id",
"NAME",
"LOC_3857"
from "GEONAME"."GEONAMES"
where FEATURECODE in ('PPLQ') )
where "cluster_id" <> 0
group by "cluster_id" ) a
join "GEONAME"."GEONAMES" n
on a."cluster".ST_Centroid().ST_Transform(4326).ST_WithinDistance(n."LOC_4326", 200000) = 1
where n."NAME" = 'WrocÅaw'
and n."FEATURECODE" = 'PPLA';3857 and 200km is distance measured using SRS 4326 - and you will be absolutely right! I just wanted to show as well mixing and transforming different SRSes in one query š]
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
| User | Count |
|---|---|
| 48 | |
| 23 | |
| 20 | |
| 18 | |
| 16 | |
| 13 | |
| 13 | |
| 13 | |
| 12 | |
| 11 |