Strabon
changeset 649:f21d6acefe7f temporals
added parser and handler files to runtime/generaldb
author | Konstantina Bereta <Konstantina.Bereta@di.uoa.gr> |
---|---|
date | Tue Oct 23 19:55:28 2012 +0300 (2012-10-23) |
parents | 5f925d5a4885 |
children | 5991b1b45020 |
files | runtime/src/main/java/eu/earthobservatory/runtime/generaldb/ModifiedNTriplesParser.java runtime/src/main/java/eu/earthobservatory/runtime/generaldb/NQuadsParser.java runtime/src/main/java/eu/earthobservatory/runtime/generaldb/NQuadsTranslator.java runtime/src/main/java/eu/earthobservatory/runtime/generaldb/QuadRDFHandler.java |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/runtime/src/main/java/eu/earthobservatory/runtime/generaldb/ModifiedNTriplesParser.java Tue Oct 23 19:55:28 2012 +0300 1.3 @@ -0,0 +1,673 @@ 1.4 +/* 1.5 + * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007. 1.6 + * 1.7 + * Licensed under the Aduna BSD-style license. 1.8 + */ 1.9 +package eu.earthobservatory.runtime.generaldb; 1.10 + 1.11 +import java.io.IOException; 1.12 +import java.io.InputStream; 1.13 +import java.io.InputStreamReader; 1.14 +import java.io.Reader; 1.15 +import java.io.UnsupportedEncodingException; 1.16 +import java.text.ParseException; 1.17 +import java.util.regex.Pattern; 1.18 + 1.19 +import org.openrdf.model.Literal; 1.20 +import org.openrdf.model.Resource; 1.21 +import org.openrdf.model.Statement; 1.22 +import org.openrdf.model.URI; 1.23 +import org.openrdf.model.Value; 1.24 +import org.openrdf.model.ValueFactory; 1.25 +import org.openrdf.model.impl.ValueFactoryImpl; 1.26 +import org.openrdf.rio.RDFFormat; 1.27 +import org.openrdf.rio.RDFHandlerException; 1.28 +import org.openrdf.rio.RDFParseException; 1.29 +import org.openrdf.rio.helpers.RDFParserBase; 1.30 +import org.openrdf.rio.ntriples.NTriplesUtil; 1.31 + 1.32 +/** 1.33 + * RDF parser for N-Triples files. A specification of NTriples can be found in 1.34 + * <a href="http://www.w3.org/TR/rdf-testcases/#ntriples">this section</a> of 1.35 + * the RDF Test Cases document. This parser is not thread-safe, therefore its 1.36 + * public methods are synchronized. 1.37 + * 1.38 + * @author Arjohn Kampman 1.39 + */ 1.40 +public class ModifiedNTriplesParser extends RDFParserBase { 1.41 + 1.42 + /*-----------* 1.43 + * Variables * 1.44 + *-----------*/ 1.45 + 1.46 + protected Reader reader; 1.47 + 1.48 + protected int lineNo; 1.49 + 1.50 + protected Resource subject; 1.51 + 1.52 + protected URI predicate; 1.53 + 1.54 + protected Value object; 1.55 + 1.56 + /*--------------* 1.57 + * Constructors * 1.58 + *--------------*/ 1.59 + 1.60 + /** 1.61 + * Creates a new NTriplesParser that will use a {@link ValueFactoryImpl} to 1.62 + * create object for resources, bNodes and literals. 1.63 + */ 1.64 + public ModifiedNTriplesParser() { 1.65 + super(); 1.66 + } 1.67 + 1.68 + /** 1.69 + * Creates a new NTriplesParser that will use the supplied 1.70 + * <tt>ValueFactory</tt> to create RDF model objects. 1.71 + * 1.72 + * @param valueFactory 1.73 + * A ValueFactory. 1.74 + */ 1.75 + public ModifiedNTriplesParser(ValueFactory valueFactory) { 1.76 + super(valueFactory); 1.77 + } 1.78 + 1.79 + /*---------* 1.80 + * Methods * 1.81 + *---------*/ 1.82 + 1.83 + // implements RDFParser.getRDFFormat() 1.84 + public RDFFormat getRDFFormat() { 1.85 + return RDFFormat.NTRIPLES; 1.86 + } 1.87 + 1.88 + /** 1.89 + * Implementation of the <tt>parse(InputStream, String)</tt> method defined 1.90 + * in the RDFParser interface. 1.91 + * 1.92 + * @param in 1.93 + * The InputStream from which to read the data, must not be 1.94 + * <tt>null</tt>. The InputStream is supposed to contain 7-bit 1.95 + * US-ASCII characters, as per the N-Triples specification. 1.96 + * @param baseURI 1.97 + * The URI associated with the data in the InputStream, must not be 1.98 + * <tt>null</tt>. 1.99 + * @throws IOException 1.100 + * If an I/O error occurred while data was read from the InputStream. 1.101 + * @throws RDFParseException 1.102 + * If the parser has found an unrecoverable parse error. 1.103 + * @throws RDFHandlerException 1.104 + * If the configured statement handler encountered an unrecoverable 1.105 + * error. 1.106 + * @throws IllegalArgumentException 1.107 + * If the supplied input stream or base URI is <tt>null</tt>. 1.108 + */ 1.109 + public synchronized void parse(InputStream in, String baseURI) 1.110 + throws IOException, RDFParseException, RDFHandlerException 1.111 + { 1.112 + if (in == null) { 1.113 + throw new IllegalArgumentException("Input stream can not be 'null'"); 1.114 + } 1.115 + // Note: baseURI will be checked in parse(Reader, String) 1.116 + 1.117 + try { 1.118 + parse(new InputStreamReader(in, "US-ASCII"), baseURI); 1.119 + } 1.120 + catch (UnsupportedEncodingException e) { 1.121 + // Every platform should support the US-ASCII encoding... 1.122 + throw new RuntimeException(e); 1.123 + } 1.124 + } 1.125 + 1.126 + /** 1.127 + * Implementation of the <tt>parse(Reader, String)</tt> method defined in the 1.128 + * RDFParser interface. 1.129 + * 1.130 + * @param reader 1.131 + * The Reader from which to read the data, must not be <tt>null</tt>. 1.132 + * @param baseURI 1.133 + * The URI associated with the data in the Reader, must not be 1.134 + * <tt>null</tt>. 1.135 + * @throws IOException 1.136 + * If an I/O error occurred while data was read from the InputStream. 1.137 + * @throws RDFParseException 1.138 + * If the parser has found an unrecoverable parse error. 1.139 + * @throws RDFHandlerException 1.140 + * If the configured statement handler encountered an unrecoverable 1.141 + * error. 1.142 + * @throws IllegalArgumentException 1.143 + * If the supplied reader or base URI is <tt>null</tt>. 1.144 + */ 1.145 + public synchronized void parse(Reader reader, String baseURI) 1.146 + throws IOException, RDFParseException, RDFHandlerException 1.147 + { 1.148 + if (reader == null) { 1.149 + throw new IllegalArgumentException("Reader can not be 'null'"); 1.150 + } 1.151 + if (baseURI == null) { 1.152 + throw new IllegalArgumentException("base URI can not be 'null'"); 1.153 + } 1.154 + 1.155 + rdfHandler.startRDF(); 1.156 + 1.157 + this.reader = reader; 1.158 + lineNo = 1; 1.159 + 1.160 + reportLocation(lineNo, 1); 1.161 + 1.162 + try { 1.163 + int c = reader.read(); 1.164 + c = skipWhitespace(c); 1.165 + 1.166 + while (c != -1) { 1.167 + if (c == '#') { 1.168 + // Comment, ignore 1.169 + c = skipLine(c); 1.170 + } 1.171 + else if (c == '\r' || c == '\n') { 1.172 + // Empty line, ignore 1.173 + c = skipLine(c); 1.174 + } 1.175 + else { 1.176 + c = parseTriple(c); 1.177 + } 1.178 + 1.179 + c = skipWhitespace(c); 1.180 + } 1.181 + } 1.182 + finally { 1.183 + clear(); 1.184 + } 1.185 + 1.186 + rdfHandler.endRDF(); 1.187 + } 1.188 + 1.189 + /** 1.190 + * Reads characters from reader until it finds a character that is not a 1.191 + * space or tab, and returns this last character. In case the end of the 1.192 + * character stream has been reached, -1 is returned. 1.193 + */ 1.194 + protected int skipWhitespace(int c) 1.195 + throws IOException 1.196 + { 1.197 + while (c == ' ' || c == '\t') { 1.198 + c = reader.read(); 1.199 + } 1.200 + 1.201 + return c; 1.202 + } 1.203 + 1.204 + protected int assertLineTerminates(int c) throws IOException, RDFParseException { 1.205 + c = reader.read(); 1.206 + 1.207 + c = skipWhitespace(c); 1.208 + 1.209 + if (c != -1 && c != '\r' && c != '\n') { 1.210 + reportFatalError("content after '.' is not allowed"); 1.211 + } 1.212 + 1.213 + return c; 1.214 + } 1.215 + 1.216 + /** 1.217 + * Reads characters from reader until the first EOL has been read. The first 1.218 + * character after the EOL is returned. In case the end of the character 1.219 + * stream has been reached, -1 is returned. 1.220 + */ 1.221 + protected int skipLine(int c) 1.222 + throws IOException 1.223 + { 1.224 + while (c != -1 && c != '\r' && c != '\n') { 1.225 + c = reader.read(); 1.226 + } 1.227 + 1.228 + // c is equal to -1, \r or \n. In case of a \r, we should 1.229 + // check whether it is followed by a \n. 1.230 + 1.231 + if (c == '\n') { 1.232 + c = reader.read(); 1.233 + 1.234 + lineNo++; 1.235 + 1.236 + reportLocation(lineNo, 1); 1.237 + } 1.238 + else if (c == '\r') { 1.239 + c = reader.read(); 1.240 + 1.241 + if (c == '\n') { 1.242 + c = reader.read(); 1.243 + } 1.244 + 1.245 + lineNo++; 1.246 + 1.247 + reportLocation(lineNo, 1); 1.248 + } 1.249 + 1.250 + return c; 1.251 + } 1.252 + 1.253 + private int parseTriple(int c) 1.254 + throws IOException, RDFParseException, RDFHandlerException 1.255 + { 1.256 + boolean ignoredAnError = false; 1.257 + try 1.258 + { 1.259 + c = parseSubject(c); 1.260 + 1.261 + c = skipWhitespace(c); 1.262 + 1.263 + c = parsePredicate(c); 1.264 + 1.265 + c = skipWhitespace(c); 1.266 + 1.267 + c = parseObject(c); 1.268 + 1.269 + c = skipWhitespace(c); 1.270 + 1.271 + if (c == -1) { 1.272 + throwEOFException(); 1.273 + } 1.274 + else if (c != '.') { 1.275 + reportError("Expected '.', found: " + (char)c); 1.276 + } 1.277 + 1.278 + c = assertLineTerminates(c); 1.279 + } 1.280 + catch(RDFParseException rdfpe) 1.281 + { 1.282 + if(stopAtFirstError()) 1.283 + { 1.284 + throw rdfpe; 1.285 + } 1.286 + else 1.287 + { 1.288 + ignoredAnError = true; 1.289 + } 1.290 + } 1.291 + 1.292 + c = skipLine(c); 1.293 + 1.294 + if(!ignoredAnError) 1.295 + { 1.296 + Statement st = createStatement(subject, predicate, object); 1.297 + rdfHandler.handleStatement(st); 1.298 + } 1.299 + 1.300 + subject = null; 1.301 + predicate = null; 1.302 + object = null; 1.303 + 1.304 + return c; 1.305 + } 1.306 + 1.307 + protected int parseSubject(int c) 1.308 + throws IOException, RDFParseException 1.309 + { 1.310 + StringBuilder sb = new StringBuilder(100); 1.311 + 1.312 + // subject is either an uriref (<foo://bar>) or a nodeID (_:node1) 1.313 + if (c == '<') { 1.314 + // subject is an uriref 1.315 + c = parseUriRef(c, sb); 1.316 + subject = createURI(sb.toString()); 1.317 + } 1.318 + else if (c == '_') { 1.319 + // subject is a bNode 1.320 + c = parseNodeID(c, sb); 1.321 + subject = createBNode(sb.toString()); 1.322 + } 1.323 + else if (c == -1) { 1.324 + throwEOFException(); 1.325 + } 1.326 + else { 1.327 + reportFatalError("Expected '<' or '_', found: " + (char)c); 1.328 + } 1.329 + 1.330 + return c; 1.331 + } 1.332 + 1.333 + protected int parsePredicate(int c) 1.334 + throws IOException, RDFParseException 1.335 + { 1.336 + StringBuilder sb = new StringBuilder(100); 1.337 + 1.338 + // predicate must be an uriref (<foo://bar>) 1.339 + if (c == '<') { 1.340 + // predicate is an uriref 1.341 + c = parseUriRef(c, sb); 1.342 + predicate = createURI(sb.toString()); 1.343 + } 1.344 + else if (c == -1) { 1.345 + throwEOFException(); 1.346 + } 1.347 + else { 1.348 + reportFatalError("Expected '<', found: " + (char)c); 1.349 + } 1.350 + 1.351 + return c; 1.352 + } 1.353 + 1.354 + protected int parseObject(int c) 1.355 + throws IOException, RDFParseException 1.356 + { 1.357 + StringBuilder sb = getBuffer(); 1.358 + 1.359 + // object is either an uriref (<foo://bar>), a nodeID (_:node1) or a 1.360 + // literal ("foo"-en or "1"^^<xsd:integer>). 1.361 + if (c == '<') { 1.362 + // object is an uriref 1.363 + c = parseUriRef(c, sb); 1.364 + object = createURI(sb.toString()); 1.365 + } 1.366 + else if (c == '_') { 1.367 + // object is a bNode 1.368 + c = parseNodeID(c, sb); 1.369 + object = createBNode(sb.toString()); 1.370 + } 1.371 + else if (c == '"') { 1.372 + // object is a literal 1.373 + StringBuilder lang = getLanguageTagBuffer(); 1.374 + StringBuilder datatype = getDatatypeUriBuffer(); 1.375 + c = parseLiteral(c, sb, lang, datatype); 1.376 + object = createLiteral(sb.toString(), lang.toString(), datatype.toString()); 1.377 + } 1.378 + else if (c == -1) { 1.379 + throwEOFException(); 1.380 + } 1.381 + else { 1.382 + reportFatalError("Expected '<', '_' or '\"', found: " + (char)c); 1.383 + } 1.384 + 1.385 + return c; 1.386 + } 1.387 + 1.388 + protected int parseUriRef(int c, StringBuilder uriRef) 1.389 + throws IOException, RDFParseException 1.390 + { 1.391 + assert c == '<' : "Supplied char should be a '<', is: " + c; 1.392 + 1.393 + // Read up to the next '>' character 1.394 + c = reader.read(); 1.395 + while (c != '>') { 1.396 + if (c == -1) { 1.397 + throwEOFException(); 1.398 + } 1.399 + uriRef.append((char)c); 1.400 + c = reader.read(); 1.401 + } 1.402 + 1.403 + // c == '>', read next char 1.404 + c = reader.read(); 1.405 + 1.406 + return c; 1.407 + } 1.408 + 1.409 + protected int parseLiteral(int c, StringBuilder literal) 1.410 + throws IOException, RDFParseException 1.411 + { 1.412 + // assert c == '<' : "Supplied char should be a '<', is: " + c; 1.413 + // Read up to the next '>' character 1.414 + if (c == '"')literal.append((char)c); 1.415 + c = reader.read(); 1.416 + while (c != '>') { 1.417 + if (c == -1) { 1.418 + throwEOFException(); 1.419 + } 1.420 + literal.append((char)c); 1.421 + c = reader.read(); 1.422 + } 1.423 + literal.append((char)c); 1.424 + c = reader.read(); 1.425 + // c == '>', read next char c = reader.read(); 1.426 + 1.427 + return c; 1.428 + } 1.429 + 1.430 + protected int parseNodeID(int c, StringBuilder name) 1.431 + throws IOException, RDFParseException 1.432 + { 1.433 + assert c == '_' : "Supplied char should be a '_', is: " + c; 1.434 + 1.435 + c = reader.read(); 1.436 + if (c == -1) { 1.437 + throwEOFException(); 1.438 + } 1.439 + else if (c != ':') { 1.440 + reportError("Expected ':', found: " + (char)c); 1.441 + } 1.442 + 1.443 + c = reader.read(); 1.444 + if (c == -1) { 1.445 + throwEOFException(); 1.446 + } 1.447 + else if (!NTriplesUtil.isLetter(c)) { 1.448 + reportError("Expected a letter, found: " + (char)c); 1.449 + } 1.450 + name.append((char)c); 1.451 + 1.452 + // Read all following letter and numbers, they are part of the name 1.453 + c = reader.read(); 1.454 + while (c != -1 && NTriplesUtil.isLetterOrNumber(c)) { 1.455 + name.append((char)c); 1.456 + c = reader.read(); 1.457 + } 1.458 + 1.459 + return c; 1.460 + } 1.461 + 1.462 + private int parseLiteral(int c, StringBuilder value, StringBuilder lang, StringBuilder datatype) 1.463 + throws IOException, RDFParseException 1.464 + { 1.465 + assert c == '"' : "Supplied char should be a '\"', is: " + c; 1.466 + 1.467 + // Read up to the next '"' character 1.468 + c = reader.read(); 1.469 + while (c != '"') { 1.470 + if (c == -1) { 1.471 + throwEOFException(); 1.472 + } 1.473 + value.append((char)c); 1.474 + 1.475 + if (c == '\\') { 1.476 + // This escapes the next character, which might be a double quote 1.477 + c = reader.read(); 1.478 + if (c == -1) { 1.479 + throwEOFException(); 1.480 + } 1.481 + value.append((char)c); 1.482 + } 1.483 + 1.484 + c = reader.read(); 1.485 + } 1.486 + 1.487 + // c == '"', read next char 1.488 + c = reader.read(); 1.489 + 1.490 + if (c == '@') { 1.491 + // Read language 1.492 + c = reader.read(); 1.493 + while (c != -1 && c != '.' && c != '^' && c != ' ' && c != '\t') { 1.494 + lang.append((char)c); 1.495 + c = reader.read(); 1.496 + } 1.497 + } 1.498 + else if (c == '^') { 1.499 + // Read datatype 1.500 + c = reader.read(); 1.501 + 1.502 + // c should be another '^' 1.503 + if (c == -1) { 1.504 + throwEOFException(); 1.505 + } 1.506 + else if (c != '^') { 1.507 + reportError("Expected '^', found: " + (char)c); 1.508 + } 1.509 + 1.510 + c = reader.read(); 1.511 + 1.512 + // c should be a '<' 1.513 + if (c == -1) { 1.514 + throwEOFException(); 1.515 + } 1.516 + else if (c != '<') { 1.517 + reportError("Expected '<', found: " + (char)c); 1.518 + } 1.519 + 1.520 + c = parseUriRef(c, datatype); 1.521 + } 1.522 + 1.523 + return c; 1.524 + } 1.525 + 1.526 + @Override 1.527 + public URI createURI(String uri) 1.528 + throws RDFParseException 1.529 + { 1.530 + try { 1.531 + uri = NTriplesUtil.unescapeString(uri); 1.532 + } 1.533 + catch (IllegalArgumentException e) { 1.534 + reportError(e.getMessage()); 1.535 + } 1.536 + 1.537 + return super.createURI(uri); 1.538 + } 1.539 + 1.540 + protected Literal createLiteral(String label, String lang, String datatype) 1.541 + throws RDFParseException 1.542 + { 1.543 + try { 1.544 + label = NTriplesUtil.unescapeString(label); 1.545 + } 1.546 + catch (IllegalArgumentException e) { 1.547 + reportError(e.getMessage()); 1.548 + } 1.549 + 1.550 + if (lang.length() == 0) { 1.551 + lang = null; 1.552 + } 1.553 + 1.554 + if (datatype.length() == 0) { 1.555 + datatype = null; 1.556 + } 1.557 + 1.558 + URI dtURI = null; 1.559 + if (datatype != null) { 1.560 + dtURI = createURI(datatype); 1.561 + } 1.562 + 1.563 + return super.createLiteral(label, lang, dtURI); 1.564 + } 1.565 + 1.566 + /** 1.567 + * Overrides {@link RDFParserBase#reportWarning(String)}, adding line number 1.568 + * information to the error. 1.569 + */ 1.570 + @Override 1.571 + protected void reportWarning(String msg) { 1.572 + reportWarning(msg, lineNo, -1); 1.573 + } 1.574 + 1.575 + /** 1.576 + * Overrides {@link RDFParserBase#reportError(String)}, adding line number 1.577 + * information to the error. 1.578 + */ 1.579 + @Override 1.580 + protected void reportError(String msg) 1.581 + throws RDFParseException 1.582 + { 1.583 + reportError(msg, lineNo, -1); 1.584 + } 1.585 + 1.586 + /** 1.587 + * Overrides {@link RDFParserBase#reportFatalError(String)}, adding line 1.588 + * number information to the error. 1.589 + */ 1.590 + @Override 1.591 + protected void reportFatalError(String msg) 1.592 + throws RDFParseException 1.593 + { 1.594 + reportFatalError(msg, lineNo, -1); 1.595 + } 1.596 + 1.597 + /** 1.598 + * Overrides {@link RDFParserBase#reportFatalError(Exception)}, adding line 1.599 + * number information to the error. 1.600 + */ 1.601 + @Override 1.602 + protected void reportFatalError(Exception e) 1.603 + throws RDFParseException 1.604 + { 1.605 + reportFatalError(e, lineNo, -1); 1.606 + } 1.607 + 1.608 + protected void throwEOFException() 1.609 + throws RDFParseException 1.610 + { 1.611 + throw new RDFParseException("Unexpected end of file"); 1.612 + } 1.613 + 1.614 + /** 1.615 + * Return a buffer of zero length and non-zero capacity. The same buffer is 1.616 + * reused for each thing which is parsed. This reduces the heap churn 1.617 + * substantially. However, you have to watch out for side-effects and convert 1.618 + * the buffer to a {@link String} before the buffer is reused. 1.619 + * 1.620 + * @param capacityIsIgnored 1.621 + * @return 1.622 + */ 1.623 + private StringBuilder getBuffer() { 1.624 + buffer.setLength(0); 1.625 + return buffer; 1.626 + } 1.627 + 1.628 + private final StringBuilder buffer = new StringBuilder(100); 1.629 + 1.630 + /** 1.631 + * Return a buffer for the use of parsing literal language tags. The buffer 1.632 + * is of zero length and non-zero capacity. The same buffer is reused for 1.633 + * each tag which is parsed. This reduces the heap churn substantially. 1.634 + * However, you have to watch out for side-effects and convert the buffer to 1.635 + * a {@link String} before the buffer is reused. 1.636 + * 1.637 + * @param capacityIsIgnored 1.638 + * @return 1.639 + */ 1.640 + private StringBuilder getLanguageTagBuffer() { 1.641 + languageTagBuffer.setLength(0); 1.642 + return languageTagBuffer; 1.643 + } 1.644 + 1.645 + private final StringBuilder languageTagBuffer = new StringBuilder(8); 1.646 + 1.647 + /** 1.648 + * Return a buffer for the use of parsing literal datatype URIs. The buffer 1.649 + * is of zero length and non-zero capacity. The same buffer is reused for 1.650 + * each datatype which is parsed. This reduces the heap churn substantially. 1.651 + * However, you have to watch out for side-effects and convert the buffer to 1.652 + * a {@link String} before the buffer is reused. 1.653 + * 1.654 + * @param capacityIsIgnored 1.655 + * @return 1.656 + */ 1.657 + private StringBuilder getDatatypeUriBuffer() { 1.658 + datatypeUriBuffer.setLength(0); 1.659 + return datatypeUriBuffer; 1.660 + } 1.661 + 1.662 + private final StringBuilder datatypeUriBuffer = new StringBuilder(40); 1.663 + 1.664 + @Override 1.665 + protected void clear() { 1.666 + super.clear(); 1.667 + // get rid of anything large left in the buffers. 1.668 + buffer.setLength(0); 1.669 + buffer.trimToSize(); 1.670 + languageTagBuffer.setLength(0); 1.671 + languageTagBuffer.trimToSize(); 1.672 + datatypeUriBuffer.setLength(0); 1.673 + datatypeUriBuffer.trimToSize(); 1.674 + } 1.675 + 1.676 +}
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/runtime/src/main/java/eu/earthobservatory/runtime/generaldb/NQuadsParser.java Tue Oct 23 19:55:28 2012 +0300 2.3 @@ -0,0 +1,292 @@ 2.4 +package eu.earthobservatory.runtime.generaldb; 2.5 + 2.6 +import org.openrdf.model.Resource; 2.7 +import org.openrdf.model.Statement; 2.8 +import org.openrdf.rio.RDFFormat; 2.9 +import org.openrdf.rio.RDFHandlerException; 2.10 +import org.openrdf.rio.RDFParseException; 2.11 + 2.12 +import java.io.IOException; 2.13 +import java.io.InputStream; 2.14 +import java.io.InputStreamReader; 2.15 +import java.io.Reader; 2.16 +import java.io.UnsupportedEncodingException; 2.17 +import java.text.DateFormat; 2.18 +import java.text.ParseException; 2.19 +import java.text.SimpleDateFormat; 2.20 +import java.util.Date; 2.21 +import java.util.regex.Pattern; 2.22 + 2.23 +import javax.naming.StringRefAddr; 2.24 + 2.25 +/** 2.26 + * RDFParser implementation for the N-Quads RDF format. 2.27 + * <p/> 2.28 + * Changes made to Aduna's N-Triple parser: 2.29 + * 1) "final" removed from NTriplesParser.getRDFFormat 2.30 + * 2) private member variables made public: reader, lineno, subject, predcate, object 2.31 + * 3) private methods: skipWhitespace, skipLine, parseSubject, parsePredicate, parseObject, throwEOFException 2.32 + * 2.33 + * @author Joshua Shinavier (http://fortytwo.net). Builds on code by Aduna. 2.34 + */ 2.35 +public class NQuadsParser extends ModifiedNTriplesParser { 2.36 + protected Resource context; 2.37 + protected String validTimeLiteral; 2.38 + 2.39 + /* 2.40 + // FIXME: delete me 2.41 + public static void main(final String[] args) throws Exception { 2.42 + String baseURI = "http://example.org/bogusBaseURI/"; 2.43 + 2.44 + Sail sail = new NativeStore(new File("/tmp/btcSmallNativeStore")); 2.45 + sail.initialize(); 2.46 + try { 2.47 + Repository repo = new SailRepository(sail); 2.48 + RepositoryConnection conn = repo.getConnection(); 2.49 + try { 2.50 + InputStream is = new FileInputStream( 2.51 + new File("/Users/josh/datasets/btc/btc-2009-small.nq")); 2.52 + try { 2.53 + RDFParser parser = new NQuadsParser(); 2.54 + parser.setRDFHandler(new RDFInserter(conn)); 2.55 + parser.parse(is, baseURI); 2.56 + } finally { 2.57 + is.close(); 2.58 + } 2.59 + } finally { 2.60 + conn.close(); 2.61 + } 2.62 + } finally { 2.63 + sail.shutDown(); 2.64 + } 2.65 + } 2.66 + */ 2.67 + 2.68 + @Override 2.69 + public RDFFormat getRDFFormat() { 2.70 + return RDFFormat.NQUADS; 2.71 + } 2.72 + 2.73 + @Override 2.74 + public void parse(final InputStream inputStream, 2.75 + final String baseURI) throws IOException, RDFParseException, RDFHandlerException { 2.76 + if (inputStream == null) { 2.77 + throw new IllegalArgumentException("Input stream can not be 'null'"); 2.78 + } 2.79 + // Note: baseURI will be checked in parse(Reader, String) 2.80 + 2.81 + try { 2.82 + parse(new InputStreamReader(inputStream, "US-ASCII"), baseURI); 2.83 + } catch (UnsupportedEncodingException e) { 2.84 + // Every platform should support the US-ASCII encoding... 2.85 + throw new RuntimeException(e); 2.86 + } 2.87 + } 2.88 + 2.89 + @Override 2.90 + public void parse(final Reader reader, 2.91 + final String baseURI) throws IOException, RDFParseException, RDFHandlerException { 2.92 + if (reader == null) { 2.93 + throw new IllegalArgumentException("Reader can not be 'null'"); 2.94 + } 2.95 + if (baseURI == null) { 2.96 + throw new IllegalArgumentException("base URI can not be 'null'"); 2.97 + } 2.98 + 2.99 + rdfHandler.startRDF(); 2.100 + 2.101 + this.reader = reader; 2.102 + lineNo = 1; 2.103 + 2.104 + reportLocation(lineNo, 1); 2.105 + 2.106 + try { 2.107 + int c = reader.read(); 2.108 + c = skipWhitespace(c); 2.109 + 2.110 + while (c != -1) { 2.111 + if (c == '#') { 2.112 + // Comment, ignore 2.113 + c = skipLine(c); 2.114 + } else if (c == '\r' || c == '\n') { 2.115 + // Empty line, ignore 2.116 + c = skipLine(c); 2.117 + } else { 2.118 + c = parseQuad(c); 2.119 + } 2.120 + 2.121 + c = skipWhitespace(c); 2.122 + } 2.123 + } catch (ParseException e) { 2.124 + // TODO Auto-generated catch block 2.125 + e.printStackTrace(); 2.126 + } finally { 2.127 + clear(); 2.128 + } 2.129 + 2.130 + rdfHandler.endRDF(); 2.131 + } 2.132 + 2.133 + private int parseQuad(int c) 2.134 + throws IOException, RDFParseException, RDFHandlerException, ParseException { 2.135 + 2.136 + boolean ignoredAnError = false; 2.137 + try 2.138 + { 2.139 + c = parseSubject(c); 2.140 + 2.141 + c = skipWhitespace(c); 2.142 + 2.143 + c = parsePredicate(c); 2.144 + 2.145 + c = skipWhitespace(c); 2.146 + 2.147 + c = parseObject(c); 2.148 + 2.149 + c = skipWhitespace(c); 2.150 + 2.151 + // Context is not required 2.152 + if (c != '.') { 2.153 + c = parseContext(c); 2.154 + c = skipWhitespace(c); 2.155 + } 2.156 + if (c == -1) { 2.157 + throwEOFException(); 2.158 + } else if (c != '.') { 2.159 + reportFatalError("Expected '.', found: " + (char) c); 2.160 + } 2.161 + 2.162 + c = assertLineTerminates(c); 2.163 + } 2.164 + catch(RDFParseException rdfpe) 2.165 + { 2.166 + if(stopAtFirstError()) 2.167 + { 2.168 + throw rdfpe; 2.169 + } 2.170 + else 2.171 + { 2.172 + ignoredAnError = true; 2.173 + } 2.174 + } 2.175 + 2.176 + c = skipLine(c); 2.177 + 2.178 + if(!ignoredAnError) 2.179 + { 2.180 + Statement st = createStatement(subject, predicate, object, context); 2.181 + rdfHandler.handleStatement(st); 2.182 + } 2.183 + 2.184 + subject = null; 2.185 + predicate = null; 2.186 + object = null; 2.187 + context = null; 2.188 + 2.189 + return c; 2.190 + } 2.191 + 2.192 + public Resource createValidTimeURI(String sb) throws ParseException, RDFParseException 2.193 + { 2.194 + String strdf = "<http://strdf.di.uoa.gr/ontology#validTime"; 2.195 + validTimeLiteral=sb; 2.196 + System.out.println("LITERAL RETURNED:"+ sb); 2.197 + //if(Pattern.matches("[*,*]\"^^<http://strdf.di.uoa.gr/ontology#validTime>", sb.toString())) 2.198 + // if( Pattern.matches("*va*", sb.toString())) 2.199 + if(sb.toString().contains("^^<http://strdf.di.uoa.gr/ontology#validTime>")) 2.200 + { System.out.println("THIS IS A VALID TIME LITERAL"); 2.201 + String[] splits = sb.toString().split(","); 2.202 + System.out.println("split1= "+splits[0].toString()); 2.203 + System.out.println("split2= "+splits[1].toString()); 2.204 +// /String[] splash1 = splits[0].split("["); 2.205 + int i = splits[0].indexOf('['); 2.206 + String element1 = splits[0].substring(++i); 2.207 + String[] splash2 = splits[1].split("]"); 2.208 + System.out.println("element1 ="+ element1); 2.209 + System.out.println("splash2 ="+ splash2[0]); 2.210 + //context= createURI(sb.toString()); 2.211 + DateFormat dateformat = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, 2.212 + DateFormat.SHORT); 2.213 + int syn = element1.indexOf('+'); 2.214 + String startDate = element1.substring(0,syn); 2.215 + syn = element1.indexOf('+'); 2.216 + String endDate = element1.substring(0,syn); 2.217 + //String endDate[] = splash2[0].split("+"); 2.218 + System.out.println("Starting date: "+ startDate); 2.219 + System.out.println("Ending date: "+ endDate); 2.220 + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 2.221 + Date start = format.parse(startDate); 2.222 + Date end = format.parse(endDate); 2.223 + System.out.println("Starting date: "+ start.toString()); 2.224 + System.out.println("Ending date: "+ end.toString()); 2.225 + String uri = strdf+ startDate+"_"+ endDate+ "_" +format.getTimeZone().getID()+ ">"; 2.226 + Resource cont = createURI(uri); 2.227 + return cont; 2.228 + } 2.229 + return null; 2.230 + } 2.231 + 2.232 + protected int parseContext(int c) 2.233 + throws IOException, RDFParseException, ParseException { 2.234 + // FIXME: context (in N-Quads) can be a literal 2.235 + StringBuilder sb = new StringBuilder(100); 2.236 + 2.237 + // subject is either an uriref (<foo://bar>) or a nodeID (_:node1) 2.238 + if (c == '<') { 2.239 + // subject is an uriref 2.240 + System.out.println("PARSEURI"); 2.241 + c = parseUriRef(c, sb); 2.242 + context = createURI(sb.toString()); 2.243 + } else if (c == '_') { 2.244 + // subject is a bNode 2.245 + c = parseNodeID(c, sb); 2.246 + System.out.println("PARSENODE"); 2.247 + context = createBNode(sb.toString()); 2.248 + }else if(c == '"'){ 2.249 + System.out.println("GOING TO PARSE THE LITERAL"); 2.250 + c = parseLiteral(c, sb); 2.251 + validTimeLiteral=sb.toString(); 2.252 + context = createURI(sb.toString()); 2.253 + //context= createValidTimeURI(sb.toString()); 2.254 + /*System.out.println("LITERAL RETURNED:"+ sb); 2.255 + //if(Pattern.matches("[*,*]\"^^<http://strdf.di.uoa.gr/ontology#validTime>", sb.toString())) 2.256 + // if( Pattern.matches("*va*", sb.toString())) 2.257 + if(sb.toString().contains("^^<http://strdf.di.uoa.gr/ontology#validTime>")) 2.258 + { System.out.println("THIS IS A VALID TIME LITERAL"); 2.259 + String[] splits = sb.toString().split(","); 2.260 + System.out.println("split1= "+splits[0].toString()); 2.261 + System.out.println("split2= "+splits[1].toString()); 2.262 +// /String[] splash1 = splits[0].split("["); 2.263 + int i = splits[0].indexOf('['); 2.264 + String element1 = splits[0].substring(++i); 2.265 + String[] splash2 = splits[1].split("]"); 2.266 + System.out.println("element1 ="+ element1); 2.267 + System.out.println("splash2 ="+ splash2[0]); 2.268 + //context= createURI(sb.toString()); 2.269 + DateFormat dateformat = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, 2.270 + DateFormat.SHORT); 2.271 + int syn = element1.indexOf('+'); 2.272 + String startDate = element1.substring(0,syn); 2.273 + syn = element1.indexOf('+'); 2.274 + String endDate = element1.substring(0,syn); 2.275 + //String endDate[] = splash2[0].split("+"); 2.276 + System.out.println("Starting date: "+ startDate); 2.277 + System.out.println("Ending date: "+ endDate); 2.278 + SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 2.279 + Date start = format.parse(startDate); 2.280 + Date end = format.parse(endDate); 2.281 + System.out.println("Starting date: "+ start.toString()); 2.282 + System.out.println("Ending date: "+ end.toString()); 2.283 + String uri = strdf+ startDate+"_"+ endDate+ "_" +format.getTimeZone().getID()+ ">"; 2.284 + context = createURI(uri); 2.285 + }*/ 2.286 + 2.287 + }else if (c == -1) { 2.288 + throwEOFException(); 2.289 + } else { 2.290 + reportFatalError("Expected '<' or '_', found: " + (char) c); 2.291 + } 2.292 + 2.293 + return c; 2.294 + } 2.295 +} 2.296 \ No newline at end of file
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/runtime/src/main/java/eu/earthobservatory/runtime/generaldb/NQuadsTranslator.java Tue Oct 23 19:55:28 2012 +0300 3.3 @@ -0,0 +1,77 @@ 3.4 +package eu.earthobservatory.runtime.generaldb; 3.5 + 3.6 +import java.io.IOException; 3.7 +import java.io.InputStream; 3.8 +import java.io.InputStreamReader; 3.9 +import java.util.Collection; 3.10 + 3.11 +import org.junit.Assert; 3.12 +import org.openrdf.model.Statement; 3.13 +import org.openrdf.rio.RDFHandlerException; 3.14 +import org.openrdf.rio.RDFParseException; 3.15 +import org.openrdf.rio.helpers.StatementCollector; 3.16 + 3.17 +import net.fortytwo.sesametools.nquads.NQuadsParser; 3.18 +import net.fortytwo.sesametools.nquads.NQuadsFormat; 3.19 + 3.20 +public class NQuadsTranslator { 3.21 + 3.22 + private NQuadsParser parser; 3.23 + private TranslateRDFHandler rdfHandler; 3.24 + 3.25 + private class TranslateRDFHandler extends StatementCollector { 3.26 + 3.27 + 3.28 + 3.29 + @Override 3.30 + public void startRDF() throws RDFHandlerException { 3.31 + super.startRDF(); 3.32 + } 3.33 + 3.34 + @Override 3.35 + public void endRDF() throws RDFHandlerException { 3.36 + super.endRDF(); 3.37 + } 3.38 + 3.39 + @Override 3.40 + public void handleStatement(Statement statement) { 3.41 + super.handleStatement(statement); 3.42 + //logger.debug(statement.toString()); 3.43 + } 3.44 + 3.45 + public TranslateRDFHandler() { 3.46 + super(); 3.47 + } 3.48 + 3.49 + 3.50 + 3.51 + } 3.52 + 3.53 + public NQuadsTranslator() { 3.54 + super(); 3.55 + this.parser = new NQuadsParser(); 3.56 + this.rdfHandler = new TranslateRDFHandler(); 3.57 + } 3.58 + 3.59 + public Collection<Statement> translate(InputStream is,String baseURI) 3.60 + { 3.61 + Collection<Statement> statements = null; 3.62 + TranslateRDFHandler handler = new TranslateRDFHandler(); 3.63 + parser.setRDFHandler(handler); 3.64 + try { 3.65 + parser.parse(is, "http://test.base.uri"); 3.66 + } catch (RDFParseException e) { 3.67 + // TODO Auto-generated catch block 3.68 + e.printStackTrace(); 3.69 + } catch (RDFHandlerException e) { 3.70 + // TODO Auto-generated catch block 3.71 + e.printStackTrace(); 3.72 + } catch (IOException e) { 3.73 + // TODO Auto-generated catch block 3.74 + e.printStackTrace(); 3.75 + } 3.76 + statements= handler.getStatements(); 3.77 + return statements; 3.78 + } 3.79 + 3.80 +}
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/runtime/src/main/java/eu/earthobservatory/runtime/generaldb/QuadRDFHandler.java Tue Oct 23 19:55:28 2012 +0300 4.3 @@ -0,0 +1,48 @@ 4.4 +package eu.earthobservatory.runtime.generaldb; 4.5 + 4.6 +import java.text.ParseException; 4.7 + 4.8 +import org.junit.Assert; 4.9 +import org.openrdf.model.Resource; 4.10 +import org.openrdf.model.Statement; 4.11 +import org.openrdf.model.URI; 4.12 +import org.openrdf.rio.RDFHandlerException; 4.13 +import org.openrdf.rio.RDFParseException; 4.14 +import org.openrdf.rio.helpers.StatementCollector; 4.15 + 4.16 +public class QuadRDFHandler extends StatementCollector { 4.17 + 4.18 + private StringBuffer triples = new StringBuffer(1024); 4.19 + 4.20 + 4.21 + @Override 4.22 + public void startRDF() throws RDFHandlerException { 4.23 + super.startRDF(); 4.24 + triples.append("\n"); 4.25 + } 4.26 + 4.27 + public StringBuffer getTriples() 4.28 + { 4.29 + return triples; 4.30 + }; 4.31 + 4.32 + @Override 4.33 + public void endRDF() throws RDFHandlerException { 4.34 + super.endRDF(); 4.35 + } 4.36 + 4.37 + @Override 4.38 + public void handleStatement(Statement st) { 4.39 + //super.handleStatement(st); 4.40 + if(st.getContext().toString().contains("^^<http://strdf.di.uoa.gr/ontology#validTime>")) 4.41 + { System.out.println("THIS IS A VALID TIME LITERAL"); 4.42 + String validTimeLiteral = st.getContext().toString(); 4.43 + String triple = st.getContext().toString() + " <http://strdf.di.uoa.gr/ontology#hasValidTime> "+ st.getContext().toString()+ " .\n" ; 4.44 + triples.append(triple); 4.45 + } 4.46 + super.handleStatement(st); 4.47 + } 4.48 + 4.49 + } 4.50 + 4.51 +