Strabon
changeset 308:7f1078abc31f
Added the Geonames parser
author | Kostis Kyzirakos <kkyzir@di.uoa.gr> |
---|---|
date | Wed Jun 20 19:03:13 2012 +0300 (2012-06-20) |
parents | c91ab390b86d |
children | d53258ac8888 |
files | pom.xml runtime/pom.xml runtime/src/main/java/eu/earthobservatory/utils/GeonamesParser.java |
line diff
1.1 --- a/pom.xml Wed Jun 20 16:48:18 2012 +0300 1.2 +++ b/pom.xml Wed Jun 20 19:03:13 2012 +0300 1.3 @@ -45,7 +45,7 @@ 1.4 1.5 <dependencyManagement> 1.6 <dependencies> 1.7 - <!-- Strabon dependencies --> 1.8 + <!-- Strabon dependencies --> 1.9 <dependency> 1.10 <groupId>org.openrdf.sesame</groupId> 1.11 <artifactId>sesame-queryalgebra-evaluation-spatial</artifactId> 1.12 @@ -91,6 +91,12 @@ 1.13 <!-- Sesame dependencies --> 1.14 <dependency> 1.15 <groupId>org.openrdf.sesame</groupId> 1.16 + <artifactId>sesame-sail-memory</artifactId> 1.17 + <version>${sesame.version}</version> 1.18 + </dependency> 1.19 + 1.20 + <dependency> 1.21 + <groupId>org.openrdf.sesame</groupId> 1.22 <artifactId>sesame-repository-sail</artifactId> 1.23 <version>${sesame.version}</version> 1.24 </dependency> 1.25 @@ -290,6 +296,36 @@ 1.26 <version>${geotools.version}</version> 1.27 </dependency> 1.28 1.29 + <dependency> 1.30 + <groupId>org.geotools</groupId> 1.31 + <artifactId>gt-epsg-wkt</artifactId> 1.32 + <version>${geotools.version}</version> 1.33 + </dependency> 1.34 + 1.35 + <dependency> 1.36 + <groupId>org.geotools</groupId> 1.37 + <artifactId>gt-geometry</artifactId> 1.38 + <version>${geotools.version}</version> 1.39 + </dependency> 1.40 + 1.41 + <dependency> 1.42 + <groupId>org.geotools</groupId> 1.43 + <artifactId>gt-data</artifactId> 1.44 + <version>${geotools.version}</version> 1.45 + </dependency> 1.46 + 1.47 + <dependency> 1.48 + <groupId>org.geotools</groupId> 1.49 + <artifactId>gt-swing</artifactId> 1.50 + <version>${geotools.version}</version> 1.51 + </dependency> 1.52 + 1.53 + <dependency> 1.54 + <groupId>org.opengis</groupId> 1.55 + <artifactId>geoapi-pending</artifactId> 1.56 + <version>2.3-M1</version> 1.57 + </dependency> 1.58 + 1.59 <!-- JTS --> 1.60 <dependency> 1.61 <groupId>com.vividsolutions</groupId>
2.1 --- a/runtime/pom.xml Wed Jun 20 16:48:18 2012 +0300 2.2 +++ b/runtime/pom.xml Wed Jun 20 19:03:13 2012 +0300 2.3 @@ -17,6 +17,11 @@ 2.4 <dependencies> 2.5 <dependency> 2.6 <groupId>org.openrdf.sesame</groupId> 2.7 + <artifactId>sesame-sail-memory</artifactId> 2.8 + </dependency> 2.9 + 2.10 + <dependency> 2.11 + <groupId>org.openrdf.sesame</groupId> 2.12 <artifactId>sesame-queryresultio-sparqlxml-spatial</artifactId> 2.13 </dependency> 2.14 2.15 @@ -273,11 +278,34 @@ 2.16 <dependency> 2.17 <groupId>monetdb</groupId> 2.18 <artifactId>jdbcclient</artifactId> 2.19 - <version>3.4.0</version> 2.20 </dependency> 2.21 2.22 - <!-- <dependency> <groupId>net.sf</groupId> <artifactId>log4jdbc3</artifactId> 2.23 - <version>1.2beta2</version> </dependency> --> 2.24 + <!-- Geotools --> 2.25 + <dependency> 2.26 + <groupId>org.geotools</groupId> 2.27 + <artifactId>gt-epsg-wkt</artifactId> 2.28 + </dependency> 2.29 + 2.30 + <dependency> 2.31 + <groupId>org.geotools</groupId> 2.32 + <artifactId>gt-geometry</artifactId> 2.33 + </dependency> 2.34 + 2.35 + <dependency> 2.36 + <groupId>org.geotools</groupId> 2.37 + <artifactId>gt-data</artifactId> 2.38 + </dependency> 2.39 + 2.40 + <dependency> 2.41 + <groupId>org.geotools</groupId> 2.42 + <artifactId>gt-swing</artifactId> 2.43 + </dependency> 2.44 + 2.45 + <dependency> 2.46 + <groupId>org.opengis</groupId> 2.47 + <artifactId>geoapi-pending</artifactId> 2.48 + </dependency> 2.49 + 2.50 </dependencies> 2.51 2.52 <build>
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/runtime/src/main/java/eu/earthobservatory/utils/GeonamesParser.java Wed Jun 20 19:03:13 2012 +0300 3.3 @@ -0,0 +1,383 @@ 3.4 +package eu.earthobservatory.utils; 3.5 + 3.6 +import info.aduna.iteration.CloseableIteration; 3.7 + 3.8 +import java.io.BufferedReader; 3.9 +import java.io.BufferedWriter; 3.10 +import java.io.ByteArrayInputStream; 3.11 +import java.io.File; 3.12 +import java.io.FileInputStream; 3.13 +import java.io.FileNotFoundException; 3.14 +import java.io.FileOutputStream; 3.15 +import java.io.IOException; 3.16 +import java.io.InputStreamReader; 3.17 +import java.io.OutputStreamWriter; 3.18 +import java.io.Serializable; 3.19 +import java.util.ArrayList; 3.20 +import java.util.HashMap; 3.21 +import java.util.List; 3.22 +import java.util.ListIterator; 3.23 +import java.util.Map; 3.24 + 3.25 +import org.geotools.data.DataUtilities; 3.26 +import org.geotools.data.shapefile.ShapefileDataStore; 3.27 +import org.geotools.data.shapefile.ShapefileDataStoreFactory; 3.28 +import org.geotools.data.simple.SimpleFeatureCollection; 3.29 +import org.geotools.data.simple.SimpleFeatureIterator; 3.30 +import org.geotools.data.simple.SimpleFeatureSource; 3.31 +import org.geotools.feature.FeatureCollections; 3.32 +import org.geotools.feature.SchemaException; 3.33 +import org.geotools.feature.simple.SimpleFeatureBuilder; 3.34 +import org.geotools.geometry.jts.JTSFactoryFinder; 3.35 +import org.opengis.feature.simple.SimpleFeature; 3.36 +import org.opengis.feature.simple.SimpleFeatureType; 3.37 +import org.opengis.geometry.MismatchedDimensionException; 3.38 +import org.opengis.referencing.FactoryException; 3.39 +import org.opengis.referencing.NoSuchAuthorityCodeException; 3.40 +import org.opengis.referencing.operation.TransformException; 3.41 +import org.openrdf.model.Namespace; 3.42 +import org.openrdf.model.Resource; 3.43 +import org.openrdf.model.Statement; 3.44 +import org.openrdf.model.URI; 3.45 +import org.openrdf.model.Value; 3.46 +import org.openrdf.model.impl.LiteralImpl; 3.47 +import org.openrdf.model.impl.StatementImpl; 3.48 +import org.openrdf.model.impl.URIImpl; 3.49 +import org.openrdf.repository.Repository; 3.50 +import org.openrdf.repository.RepositoryConnection; 3.51 +import org.openrdf.repository.RepositoryException; 3.52 +import org.openrdf.repository.sail.SailRepository; 3.53 +import org.openrdf.rio.RDFHandler; 3.54 +import org.openrdf.rio.RDFHandlerException; 3.55 +import org.openrdf.rio.RDFParseException; 3.56 +import org.openrdf.rio.RDFWriter; 3.57 +import org.openrdf.rio.turtle.TurtleWriter; 3.58 + 3.59 +import org.openrdf.sail.memory.MemoryStore; 3.60 + 3.61 +import com.vividsolutions.jts.geom.Coordinate; 3.62 +import com.vividsolutions.jts.geom.Geometry; 3.63 +import com.vividsolutions.jts.geom.GeometryFactory; 3.64 +import com.vividsolutions.jts.geom.MultiLineString; 3.65 +import com.vividsolutions.jts.geom.Point; 3.66 +import com.vividsolutions.jts.io.ParseException; 3.67 +import com.vividsolutions.jts.io.WKTReader; 3.68 + 3.69 +public class GeonamesParser { 3.70 + 3.71 + static int pointsOut = 0; 3.72 + static int e = 0, a = 0; 3.73 + 3.74 + public static void main(String[] args) throws IOException, SchemaException, 3.75 + NoSuchAuthorityCodeException, FactoryException, 3.76 + MismatchedDimensionException, TransformException { 3.77 + 3.78 + if (args.length < 2) { 3.79 + System.err.println("Usage: eu.earthobservatory.utils.GeonamesParser <IN_FILE> <OUT_FILE>"); 3.80 + System.exit(0); 3.81 + } 3.82 + final String inFile = args[0]; 3.83 + final String outFile = args[1]; 3.84 + final String baseURI = "http://www.geonames.org"; 3.85 + 3.86 + String greece = "POLYGON((" + 3.87 + "19.2214041948412 39.915644583545,19.2214041948412 39.915644583545,19.181097387078 39.9237059450676," + 3.88 + "19.5519200182563 40.0285036451009,19.6567177185669 39.9156445835922,20.1968289424263 39.9075832219288," + 3.89 + "20.3741788961446 40.1655467910226,20.6805106346546 40.4073876369722,20.8497992263774 40.94749885984," + 3.90 + "21.6962421887069 41.0845420054562,21.8897148655894 41.2538305976083,22.5910533201724 41.2780146817553," + 3.91 + "22.5829919583984 41.4231191894311,23.1150418204718 41.4473032736882,23.5987235131646 41.5279168887397," + 3.92 + "23.896993890336 41.5682236962285,24.3967983060956 41.6568986728098,24.7434368528031 41.5359782495121," + 3.93 + "25.331916245936 41.3989351029729,25.9687638080256 41.4553646333056,26.0574387848902 41.5440396101636," + 3.94 + "25.9284569997944 41.7536350102425,26.1944819306803 41.8423099869814,26.6297954543722 41.7375122866789," + 3.95 + "26.7426545167742 41.2941374019925,26.4685682243736 41.1812783406592,26.5169363940019 40.9716829406332," + 3.96 + "26.0896842326308 40.6008603101938,25.5415116482987 40.0688104490916,26.4927523118004 39.4642083330579," + 3.97 + "26.7265317973552 39.0288948098343,26.4040773361785 38.5693972024431,27.1537839611879 37.8599973865071," + 3.98 + "28.451663172698 36.4734432006192,28.0082882886197 35.6834297705082,29.5560697054562 36.3605841370316," + 3.99 + "29.9671991450442 36.0864978436166,27.0570476273072 34.6596368562464,24.0904665776989 34.5870846050009," + 3.100 + "21.9219603198949 35.538325268111,19.2214041948412 39.915644583545" + 3.101 + "))"; 3.102 + 3.103 + Repository myRepository = new SailRepository(new MemoryStore()); 3.104 + BufferedReader dis = null; 3.105 + RepositoryConnection conn = null; 3.106 + boolean isEven = false; 3.107 + 3.108 + Geometry greeceGeo = null; 3.109 + try { 3.110 + //mbbInWGS84 = new WKTReader().read("POLYGON((-10.6700 34.5000, 31.5500 34.5000, 31.5500 71.0500, -10.6700 71.0500, -10.6700 34.5000))"); 3.111 + greeceGeo = new WKTReader().read(greece); 3.112 + greeceGeo.setSRID(4326); 3.113 + } catch (ParseException e1) { 3.114 + e1.printStackTrace(); 3.115 + } 3.116 + 3.117 + // System.out.println("|--MBB Created--|"); 3.118 + 3.119 + // // -- Display map of Greece and its mbb -- // 3.120 + // SimpleFeatureCollection mbbCollection = createCollection(mbbInWGS84); 3.121 + // MapContext map = new DefaultMapContext(); 3.122 + // map.setTitle("Map"); 3.123 + // map.addLayer(clcFeatureCollection, null); 3.124 + // map.addLayer(mbbCollection, null); 3.125 + // JMapFrame.showMap(map); 3.126 + 3.127 + try { 3.128 + // Initialization of classes used for RDF handling 3.129 + myRepository.initialize(); 3.130 + conn = myRepository.getConnection(); 3.131 + conn.setAutoCommit(true); 3.132 + 3.133 + dis = new BufferedReader(new InputStreamReader( 3.134 + new FileInputStream(new File(inFile)))); 3.135 + BufferedWriter osw = new BufferedWriter(new OutputStreamWriter( 3.136 + new FileOutputStream(outFile), "UTF-8")); 3.137 + // BufferedWriter osw = new BufferedWriter(new 3.138 + // OutputStreamWriter(new FileOutputStream(outFile), "UTF-8")); 3.139 + RDFWriter wr = new TurtleWriter(osw); 3.140 + String line = null; 3.141 + 3.142 + // Adding data to repository 3.143 + wr.startRDF(); 3.144 + while (dis.ready()) { 3.145 + line = dis.readLine(); 3.146 + if (isEven) { 3.147 + try { 3.148 + conn.clear(); 3.149 + //ByteArrayInputStream bis = new ByteArrayInputStream(line.getBytes()); 3.150 + ByteArrayInputStream bis = new ByteArrayInputStream(line.getBytes("UTF-8")); 3.151 + conn.add(bis, baseURI, org.openrdf.rio.RDFFormat.RDFXML); 3.152 + exportGeoname(wr, conn, greeceGeo); 3.153 + } catch (RDFParseException ex) { 3.154 + System.err.println(line); 3.155 + e++; 3.156 + } 3.157 + } else { 3.158 + a++; 3.159 + } 3.160 + isEven = !isEven; 3.161 + } 3.162 + wr.endRDF(); 3.163 + dis.close(); 3.164 + 3.165 + System.out.println("Rejected features: " + e); 3.166 + System.out.println("Parsed features: " + a); 3.167 + System.out.println("Points out of MBB of Greece: " + pointsOut); 3.168 + } catch (RepositoryException e) { 3.169 + e.printStackTrace(); 3.170 + } catch (FileNotFoundException e) { 3.171 + e.printStackTrace(); 3.172 + } catch (IOException e) { 3.173 + e.printStackTrace(); 3.174 + } catch (RDFHandlerException e) { 3.175 + e.printStackTrace(); 3.176 + } 3.177 + 3.178 + } 3.179 + 3.180 + /** 3.181 + * Create a SimpleFeatureCollection with a Geometry 3.182 + * 3.183 + * @param all 3.184 + * @return 3.185 + * @throws SchemaException 3.186 + */ 3.187 + public static SimpleFeatureCollection createCollection(Geometry g) 3.188 + throws SchemaException { 3.189 + 3.190 + SimpleFeatureCollection collection = FeatureCollections.newCollection(); 3.191 + SimpleFeatureType TYPE = DataUtilities.createType("MBB", 3.192 + "location:Polygon:srid=4326"); // 4326 = srid of wgs84 3.193 + SimpleFeatureBuilder featureBuilder = new SimpleFeatureBuilder(TYPE); 3.194 + 3.195 + featureBuilder.add(g); 3.196 + SimpleFeature feature = featureBuilder.buildFeature(null); 3.197 + collection.add(feature); 3.198 + 3.199 + return collection; 3.200 + } 3.201 + 3.202 +// public static Geometry getMBB(String filename) throws IOException, 3.203 +// SchemaException, NoSuchAuthorityCodeException, FactoryException { 3.204 +// SimpleFeatureCollection featureCollection = getFeatureCollection( 3.205 +// filename, "ISO8859-7"); 3.206 +// 3.207 +// SimpleFeatureIterator featureIterator = featureCollection.features(); 3.208 +// 3.209 +// MultiPolygon all = null; 3.210 +// // Iterate features of shp file 3.211 +// while (featureIterator.hasNext()) { 3.212 +// SimpleFeature f = featureIterator.next(); 3.213 +// List<Object> attributes = f.getAttributes(); 3.214 +// 3.215 +// MultiPolygon landItem = (MultiPolygon) attributes.get(0); 3.216 +// 3.217 +// if (all == null) 3.218 +// all = landItem; 3.219 +// else 3.220 +// all = (MultiPolygon) all.union(landItem); 3.221 +// } 3.222 +// 3.223 +// return all.getEnvelope(); 3.224 +// } 3.225 + 3.226 + /** 3.227 + * @param filename 3.228 + * shp file to be opened 3.229 + * @param encoding 3.230 + * encoding of opened shp file 3.231 + * @return a feature collection of opened shp file 3.232 + * @throws IOException 3.233 + * @throws FactoryException 3.234 + * @throws NoSuchAuthorityCodeException 3.235 + */ 3.236 + public static SimpleFeatureCollection getFeatureCollection(String filename, 3.237 + String encoding) throws IOException, NoSuchAuthorityCodeException, 3.238 + FactoryException { 3.239 + // Open file 3.240 + File infile = new File(filename); 3.241 + 3.242 + // Parameters of ShapefileDatastore 3.243 + Map<String, Serializable> params = new HashMap<String, Serializable>(); 3.244 + params.put(ShapefileDataStoreFactory.URLP.key, infile.toURI().toURL()); 3.245 + // params.put("create spatial index", Boolean.TRUE); 3.246 + params.put(ShapefileDataStoreFactory.DBFCHARSET.key, encoding); 3.247 + 3.248 + // -- Create ShapefileDatastore -- // 3.249 + ShapefileDataStoreFactory dataStoreFactory = new ShapefileDataStoreFactory(); 3.250 + ShapefileDataStore shapefile = (ShapefileDataStore) dataStoreFactory 3.251 + .createNewDataStore(params); 3.252 + // shapefile.setStringCharset(Charset.forName("ISO8859-7")); 3.253 + // DataStore shapefile = new ShapefileDataStore( infile.toURI().toURL(), false); 3.254 + 3.255 + // Get feature collection 3.256 + String[] typeName = shapefile.getTypeNames(); 3.257 + SimpleFeatureSource featureSource = shapefile 3.258 + .getFeatureSource(typeName[0]); 3.259 + //SimpleFeatureCollection collection; 3.260 + SimpleFeatureCollection featureCollection = featureSource.getFeatures(); 3.261 + 3.262 + // SimpleFeatureIterator clcFeatureIterator = 3.263 + // clcFeatureCollection.features(); 3.264 + return featureCollection; 3.265 + } 3.266 + 3.267 + // public static void showMap(String filename) throws IOException { 3.268 + // 3.269 + // File file = new File(filename); 3.270 + // 3.271 + // FileDataStore store = FileDataStoreFinder.getDataStore(file); 3.272 + // SimpleFeatureSource featureSource = store.getFeatureSource(); 3.273 + // 3.274 + // // Create a map context and add our shapefile to it 3.275 + // MapContext map = new DefaultMapContext(); 3.276 + // map.setTitle("Quickstart"); 3.277 + // map.addLayer(featureSource, null); 3.278 + // 3.279 + // // Now display the map 3.280 + // JMapFrame.showMap(map); 3.281 + // } 3.282 + 3.283 + /** 3.284 + * @param featureIterator 3.285 + * iterator of features to print 3.286 + * @throws IOException 3.287 + */ 3.288 + public static void printFeatures(SimpleFeatureIterator featureIterator) 3.289 + throws IOException { 3.290 + 3.291 + // Iterate features of shp file 3.292 + while (featureIterator.hasNext()) { 3.293 + SimpleFeature f = featureIterator.next(); 3.294 + List<Object> attributes = f.getAttributes(); 3.295 + 3.296 + for (int i = 0; i < attributes.size(); i++) { 3.297 + // MultiLineString geometry = 3.298 + // (MultiLineString)attributes.get(0); 3.299 + Object attribute = attributes.get(i); 3.300 + if (!(attribute instanceof MultiLineString)) 3.301 + System.out.println(attributes.get(i).toString()); 3.302 + } 3.303 + System.out.println("---------"); 3.304 + } 3.305 + } 3.306 + 3.307 + public static void exportGeoname(RDFHandler handler, 3.308 + RepositoryConnection conn, Geometry greeceGeo) 3.309 + throws RepositoryException, RDFHandlerException { 3.310 + 3.311 + final URI hasGeography = new URIImpl( 3.312 + "http://teleios.di.uoa.gr/ontologies/noaOntology.owl#hasGeography"); 3.313 + final URI geometry = new URIImpl("http://strdf.di.uoa.gr/ontology#WKT"); 3.314 + URI latPredicate = new URIImpl( 3.315 + "http://www.w3.org/2003/01/geo/wgs84_pos#lat"); 3.316 + URI longPredicate = new URIImpl( 3.317 + "http://www.w3.org/2003/01/geo/wgs84_pos#long"); 3.318 + 3.319 + handler.handleNamespace("geo", "http://www.example.org/geo#"); 3.320 + handler.handleNamespace("strdf", "http://strdf.di.uoa.gr/ontology#"); 3.321 + handler.handleNamespace("noa", 3.322 + "http://teleios.di.uoa.gr/ontologies/noaOntology.owl#"); 3.323 + 3.324 + // Export namespace information 3.325 + CloseableIteration<? extends Namespace, RepositoryException> nsIter = conn 3.326 + .getNamespaces(); 3.327 + try { 3.328 + while (nsIter.hasNext()) { 3.329 + Namespace ns = nsIter.next(); 3.330 + handler.handleNamespace(ns.getPrefix(), ns.getName()); 3.331 + } 3.332 + } finally { 3.333 + nsIter.close(); 3.334 + } 3.335 + 3.336 + // Export statements 3.337 + CloseableIteration<? extends Statement, RepositoryException> stIter = conn 3.338 + .getStatements(null, null, null, false); 3.339 + 3.340 + try { 3.341 + Value latV = null, longV = null; 3.342 + Statement st = null; 3.343 + ArrayList<Statement> stL = new ArrayList<Statement>(); 3.344 + Resource sub = null; 3.345 + while (stIter.hasNext()) { 3.346 + st = stIter.next(); 3.347 + URI p = st.getPredicate(); 3.348 + 3.349 + if (p.equals(latPredicate)) { 3.350 + sub = st.getSubject(); 3.351 + latV = st.getObject(); 3.352 + } else if (p.equals(longPredicate)) { 3.353 + longV = st.getObject(); 3.354 + } else { 3.355 + stL.add(st); 3.356 + } 3.357 + } 3.358 + st = new StatementImpl(sub, hasGeography, new LiteralImpl("POINT(" 3.359 + + longV.stringValue() + " " + latV.stringValue() + ")", 3.360 + geometry) // TODO einai swsth h seira, nomizw nai ?? 3.361 + ); 3.362 + stL.add(st); 3.363 + 3.364 + GeometryFactory geometryFactory = JTSFactoryFinder 3.365 + .getGeometryFactory(null); 3.366 + Point point = geometryFactory.createPoint(new Coordinate(Double.parseDouble( 3.367 + //latV.stringValue()), Double.parseDouble(longV.stringValue())) 3.368 + longV.stringValue()), Double.parseDouble(latV.stringValue())) 3.369 + ); 3.370 + // TODO To parapanw nomizw einai swsto edw giati ta pairnei 3.371 + // anapoda?? 3.372 + 3.373 + if (greeceGeo.contains(point)) { 3.374 + ListIterator<Statement> stLI = stL.listIterator(); 3.375 + while (stLI.hasNext()) { 3.376 + handler.handleStatement(stLI.next()); 3.377 + } 3.378 + } else 3.379 + pointsOut++; 3.380 + 3.381 + System.out.println(a + ": " + e + " - " + pointsOut); 3.382 + } finally { 3.383 + stIter.close(); 3.384 + } 3.385 + } 3.386 +} 3.387 \ No newline at end of file