Changeset 3977
- Timestamp:
- 03/06/08 14:10:22 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ws-Discovery2-upgrade/src/ndg/services/discovery/SearchAgent.java
r3965 r3977 4 4 import java.util.Iterator; 5 5 import java.util.Vector; 6 import java.util.Properties; 6 import java.text.SimpleDateFormat; 7 import java.util.ArrayList; 7 8 import java.math.BigInteger; // needed because xmlbeans uses this for xsd:positiveInteger 8 9 import java.math.BigDecimal; // needed because xmlbeans uses this for xsd:decimal 9 10 import java.util.Calendar; // needed because xmlbeans uses this for xsd:date 10 11 import java.util.logging.Logger; 11 import java.util.regex.Matcher;12 import java.text.DecimalFormat;13 import java.text.SimpleDateFormat;14 12 15 13 /** … … 20 18 public class SearchAgent 21 19 { 22 23 20 private static Logger logger = Logger.getLogger(SearchAgent.class.getName()); 24 21 25 private static Properties properties = new ServiceProperties().getProperties();26 27 22 // Internal defaults 28 29 23 private String termType = null; 30 24 31 25 // Initialise input parameters, setting values to internal defaults 32 // private int resultId = 0;33 26 private BigInteger start = new BigInteger("0"); // what row of results to start retrieval from 34 27 private BigInteger howMany = new BigInteger("30"); // how many results to display in result set … … 36 29 private String orderByField = null; // representation used in WSDL e.g. "date", "dataCentre" 37 30 private String orderByDirection = "ASC"; //default orderBy direction 38 // TODO ...add orderByDirection (and to WSDL)39 31 40 32 Vector<String> scopes = new Vector<String>(); 41 private String spatialOperator = "overlaps";33 private String spatialOperator = DiscoveryServiceSkeleton.OVERLAPS_OPERATOR_TYPE; 42 34 43 35 private BigDecimal limitWest; … … 51 43 52 44 private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); 53 54 // info on the DB table structure55 private final String ORIGINAL_DOCUMENT_TABLE ="ORIGINAL_DOCUMENT";56 private final String SPATIAL_DATA_TABLE ="SPATIAL_DATA";57 private final String TEMPORAL_DATA_TABLE ="TEMPORAL_DATA";58 private final String SPATIAL_TEMPORAL_DATA_TABLE ="SPATIAL_TEMPORAL_DATA";59 60 // sql fragments to use when creating sql command61 private final String WHERE_STATEMENT = " WHERE ";62 private final String LIMIT_STATEMENT = " LIMIT ";63 private final String OFFSET_STATEMENT = " OFFSET ";64 private final String ORDERBY_STATEMENT = " ORDER BY ";65 45 66 46 /** … … 145 125 public void addNewScope(String s) 146 126 { 127 s = s.replace("_", DiscoveryServiceSkeleton.UNDERSCORE_REPLACEMENT); 147 128 this.scopes.add(s); 148 129 } … … 158 139 159 140 /** 160 * Sets north bound of BoundingBox. 161 * <p>Value should be BigDecimal. 162 * @param f value of north bound of search box 163 */ 164 165 /** 166 * Returns scopes as space-separated String 167 * @return String containing scope keywords separated by spaces 141 * Returns scopes as '&' String - to allow an 'and' search via tsquery 142 * 143 * @return String containing scope keywords separated by '&'s 168 144 */ 169 145 public String getScopesAsString() 170 146 { 171 String scopesStr = "";147 StringBuilder scopesStr = new StringBuilder(); 172 148 Iterator<String> it = this.scopes.iterator(); 173 149 while ( it.hasNext() ) 174 150 { 175 scopesStr += it.next(); 176 } 177 System.out.println("got: " + scopesStr); 178 System.out.println("alternatively: " + this.scopes.toString()); 179 return scopesStr; 151 if (scopesStr.length() > 0) 152 scopesStr.append(" & "); 153 154 scopesStr.append(it.next()); 155 } 156 return scopesStr.toString(); 180 157 } 181 158 … … 324 301 * Executes the search and returns a SearchSummary object 325 302 * @return SearchSummary object containing result of search 326 */ 327 public SearchSummary doSearch() 303 * @throws DiscoveryDBException 304 */ 305 public SearchSummary doSearch() throws DiscoveryDBException 328 306 { 329 307 330 308 SearchSummary result = null; 331 309 332 try 333 { 334 // firstly prepare the search query string 335 String sqlQuery = constructSearchQuery(); 310 // firstly prepare the search query string 311 String sqlQuery = constructSearchQuery(); 312 313 // now run the query using the db client configured appropriately 314 InterfaceDBClient client = DBProperties.setupDBClient(); 315 String[][] results = client.runQuery(sqlQuery); 316 client.closeConnection(); 317 318 // now extract the results data 319 result = processResults(results); 336 320 337 // now run the query using the db client configured appropriately338 InterfaceDBClient client = setupDBClient();339 String[][] results = client.runQuery(sqlQuery);340 341 // now extract the results data342 result = processResults(results);343 344 }345 catch (Exception e)346 {347 result.setStatus(false);348 result.setStatusMessage("An error ocurred - details : " + e.toString());349 }350 351 321 return result; 352 322 } … … 364 334 logger.info("Extracting results"); 365 335 SearchSummary result = new SearchSummary(); 366 result.setHits(results.length); // NB ensure this is 0 if not data returned 367 368 if ( result.getHits() > 0) 336 result.setHits(0); // NB ensure this is 0 if not data returned 337 ArrayList<String> docs = new ArrayList<String>(); 338 339 if (results.length > 0) 369 340 { 370 341 Vector<Hashtable<String, String>> documents = new Vector<Hashtable<String, String>>(); … … 372 343 { 373 344 Hashtable<String, String> thisDoc = new Hashtable<String, String>(); 374 thisDoc.put("name", results[i][0]); 375 376 // TODO: these next two are not used anywhere - remove or use? 377 thisDoc.put("matches", String.valueOf(1)); 378 thisDoc.put("position", String.valueOf(i)); 379 documents.add( thisDoc ); 345 // avoid duplicate results - NB, this could be done using the DISTINCT SQL keyword but 346 // this would require the 'order by' clause to change so would necessitate a much more 347 // complex sql query construction 348 if (! docs.contains(results[i][0])) 349 { 350 logger.info("Found matching document - adding this to results"); 351 docs.add(results[i][0]); 352 thisDoc.put("name", results[i][0]); 353 354 // TODO: these next two are not used anywhere - remove or use? 355 thisDoc.put("matches", String.valueOf(1)); 356 thisDoc.put("position", String.valueOf(i)); 357 documents.add( thisDoc ); 358 } 380 359 } 381 360 result.setStatus( true ); 382 361 result.setStatusMessage( "Success" ); 383 362 result.setDocuments( documents ); 363 result.setHits(docs.size()); // NB ensure this is 0 if not data returned 384 364 logger.info("Results added to search summary"); 385 365 } … … 396 376 397 377 /** 398 * Set up a DBClient object to use with running the search - getting the config details399 * from the properties file400 *401 * @return DBClient ready to accept SQL queries to run402 * @throws DiscoveryDBException403 */404 private InterfaceDBClient setupDBClient() throws DiscoveryDBException405 {406 logger.info("Setting up DBClient using config info from properties file...");407 String connectionString = properties.getProperty("jdbc.uri");408 String userName = properties.getProperty("jdbc.username");409 String pw = properties.getProperty("jdbc.password");410 411 InterfaceDBClient client = new PostgresDBClient(connectionString, userName, pw);412 logger.info("DBClient set up for use with Discovery service.");413 return client;414 }415 416 /**417 378 * Set up a SQL Select query with the input search parameters 418 379 * … … 425 386 // NB, we use the postgres text search function to do term searches 426 387 StringBuffer fromSqlCmd = new StringBuffer("SELECT original_document_filename FROM " + 427 ORIGINAL_DOCUMENT_TABLE + " ");388 DBProperties.ORIGINAL_DOCUMENT_TABLE + " "); 428 389 StringBuffer whereSqlCmd = new StringBuffer(); 429 390 391 String termColumn = null; 430 392 if (Utilities.isStringDefined(this.getTerm())) 431 393 { 394 termColumn = "document_ts_vector"; // default column to use - i.e. a full text search 395 // NB, if no term type provided, assume full text search 432 396 if (Utilities.isStringDefined(this.getTermType())) 397 { 433 398 logger.info("Adjusting query for " + this.getTermType() + " type search"); 434 435 // NB, if no term type provided, assume full text search 436 if (! Utilities.isStringDefined(this.getTermType()) || 437 this.getTermType().equals(DiscoveryServiceSkeleton.FULL_TEXT_TERM_TYPE)) 438 { 439 appendWhereClause(whereSqlCmd, " to_tsquery('" + this.term + "') @@ ts_vector "); 440 } 441 else if (this.getTermType().equals(DiscoveryServiceSkeleton.AUTHOR_TERM_TYPE)) 442 { 443 //TODO: IMPLEMENT datamodel side... 444 appendWhereClause(whereSqlCmd, " AUTHOR LIKE %" + this.term + "% "); 445 } 446 else if (this.getTermType().equals(DiscoveryServiceSkeleton.PARAMETER_TERM_TYPE)) 447 { 448 //TODO: IMPLEMENT datamodel side... 449 appendWhereClause(whereSqlCmd, " PARAMETERS LIKE %" + this.term + "% "); 450 } 451 452 } 453 454 // Substitute values in template xquery string 455 /* xqueryStr = xqueryStr.replaceFirst("__subst_term__", this.term); 456 xqueryStr = xqueryStr.replaceFirst("__subst_scope__", this.getScopesAsString() ); 457 458 459 */ 399 400 if (this.getTermType().equals(DiscoveryServiceSkeleton.AUTHOR_TERM_TYPE)) 401 { 402 termColumn = "authors_ts_vector "; 403 } 404 else if (this.getTermType().equals(DiscoveryServiceSkeleton.PARAMETER_TERM_TYPE)) 405 { 406 termColumn = "parameters_ts_vector"; 407 } 408 } 409 fromSqlCmd.append(", to_tsquery('" + this.term + "') AS query "); 410 appendWhereClause(whereSqlCmd, "query @@ COALESCE(" + termColumn + ", '') "); 411 } 412 413 if (this.getScopes() != null && this.getScopes().size() > 0) 414 { 415 logger.info("Adding scopes data to query"); 416 appendWhereClause(whereSqlCmd, " to_tsquery('" + this.getScopesAsString() + "') @@ scope_ts_vector "); 417 } 418 460 419 // Construct the search clause for spatio temporal search, if necessary 461 420 if (isSpatialSearch()) … … 464 423 // set up the bounding box geometry for the search 465 424 String bbox = "SetSRID('BOX3D(" + this.getLimitWest().floatValue() + " " + this.getLimitSouth().floatValue() + 466 ", " + this.getLimitEast().floatValue() + " " + this.getLimitNorth().floatValue() + ")â::box3d,-1)"; 467 425 ", " + this.getLimitEast().floatValue() + " " + this.getLimitNorth().floatValue() + ")'::box3d, 4326)"; 426 427 fromSqlCmd.append(", " + DBProperties.SPATIAL_DATA_TABLE); 428 String spatialPart = DBProperties.SPATIAL_DATA_TABLE + ".geometry "; 468 429 if (spatialOperator.equals(DiscoveryServiceSkeleton.OVERLAPS_OPERATOR_TYPE)) 469 430 { 470 appendWhereClause(whereSqlCmd, " geometry && " + bbox + " ");431 spatialPart += "&& " + bbox + " "; 471 432 } 472 433 else if ( spatialOperator.equals(DiscoveryServiceSkeleton.WITHIN_OPERATOR_TYPE) ) 473 434 { 474 appendWhereClause(whereSqlCmd, " geometry @ " + bbox + " ");435 spatialPart +="@ " + bbox + " "; 475 436 } 476 437 else if ( spatialOperator.equals("doesNotOverlap") ) 477 438 { 478 appendWhereClause(whereSqlCmd, "NOT geometry && " + bbox + " "); 479 } 439 spatialPart = "NOT " + spatialPart + "&& " + bbox + " "; 440 } 441 appendWhereClause(whereSqlCmd, spatialPart); 480 442 } 481 443 … … 483 445 { 484 446 logger.info("Adding temporal data to query"); 485 fromSqlCmd.append(", " + TEMPORAL_DATA_TABLE + ", " + SPATIAL_TEMPORAL_DATA_TABLE + "");486 appendWhereClause(whereSqlCmd, " " +TEMPORAL_DATA_TABLE + ".start_time, " +487 TEMPORAL_DATA_TABLE + ".end_time) OVERLAPS (DATE '" +447 fromSqlCmd.append(", " + DBProperties.TEMPORAL_DATA_TABLE); 448 appendWhereClause(whereSqlCmd, " (" + DBProperties.TEMPORAL_DATA_TABLE + ".start_time, " + 449 DBProperties.TEMPORAL_DATA_TABLE + ".end_time) OVERLAPS (DATE '" + 488 450 this.sdf.format(this.getDateRangeStart().getTime()) + 489 "', DATE '" + this.sdf.format(this.getDateRangeEnd().getTime() ) + "' AND " +490 TEMPORAL_DATA_TABLE + ".temporal_data_id == " +491 SPATIAL_TEMPORAL_DATA_TABLE + ".temporal_data_id");451 "', DATE '" + this.sdf.format(this.getDateRangeEnd().getTime() ) + "') AND " + 452 DBProperties.TEMPORAL_DATA_TABLE + ".temporal_data_id = " + 453 DBProperties.SPATIAL_TEMPORAL_DATA_TABLE + ".temporal_data_id"); 492 454 } 493 455 … … 496 458 if (isTemporalSearch() || isSpatialSearch()) 497 459 { 498 whereSqlCmd.append(" AND " + SPATIAL_TEMPORAL_DATA_TABLE + ".original_document_id == " + 499 ORIGINAL_DOCUMENT_TABLE + ".original_document_id"); 500 } 501 502 // add order by, and direction, if required 503 if (this.getOrderByField() != null) 504 { 505 logger.info("Adding ordering info to query"); 506 // orderByField contains $ chars etc so needs special treatment to 507 // avoid "java.lang.IllegalArgumentException: Illegal group reference" when replacing 508 // TODO: is this true now? 509 String orderByFieldReplace = Matcher.quoteReplacement(this.getOrderByField()); 510 whereSqlCmd.append(ORDERBY_STATEMENT + orderByFieldReplace + " " + this.getOrderByDirection()); 460 fromSqlCmd.append(", " + DBProperties.SPATIAL_TEMPORAL_DATA_TABLE); 461 whereSqlCmd.append(" AND " + DBProperties.SPATIAL_TEMPORAL_DATA_TABLE + ".original_document_id = " + 462 DBProperties.ORIGINAL_DOCUMENT_TABLE + ".original_document_id"); 511 463 } 512 464 513 465 // add the WHERE statement, if required 514 466 if (whereSqlCmd.length() > 0) 515 whereSqlCmd.insert(0, WHERE_STATEMENT); 467 whereSqlCmd.insert(0, DBProperties.WHERE_STATEMENT); 468 469 // add order by, and direction, if required 470 logger.info("Adding ordering info to query"); 471 if (this.getOrderByField() != null) 472 { 473 logger.info("- adding specified order by parameter"); 474 whereSqlCmd.append(DBProperties.ORDERBY_STATEMENT + this.getOrderByField() + " " + this.getOrderByDirection()); 475 } 476 else if (termColumn != null) 477 { 478 // use natural ordering of results according to statistics worked out by the text search facility, if appropriate 479 logger.info("- order according to rank of search result"); 480 whereSqlCmd.append(DBProperties.ORDERBY_STATEMENT + " ts_rank(" + termColumn + ", query) " + this.getOrderByDirection()); 481 } 516 482 517 483 // lastly, add the size limit and offset of the returned data set 518 484 logger.info("Adding size limit and offset of results set"); 519 whereSqlCmd.append( OFFSET_STATEMENT + this.getStart());520 whereSqlCmd.append( LIMIT_STATEMENT + this.getHowMany());485 whereSqlCmd.append(DBProperties.OFFSET_STATEMENT + this.getStart()); 486 whereSqlCmd.append(DBProperties.LIMIT_STATEMENT + this.getHowMany()); 521 487 522 488 String fullCmd = fromSqlCmd.toString() + whereSqlCmd.toString() + ";"; … … 580 546 this.getLimitNorth() != null && this.getLimitSouth() != null) 581 547 { 582 if (this.getLimitEast().intValue() != 180 && this.getLimitWest().intValue() != -180 && 583 this.getLimitNorth().intValue() != 90 && this.getLimitSouth().intValue() != -90) 584 { 585 logger.info("Valid spatial data specified"); 586 return true; 587 } 588 logger.info("Spatial data is equivalent to global search - will ignore in search"); 548 if (this.getLimitEast().intValue() == 180 && this.getLimitWest().intValue() == -180 && 549 this.getLimitNorth().intValue() == 90 && this.getLimitSouth().intValue() == -90) 550 { 551 logger.info("Spatial data is equivalent to global search - will ignore in search"); 552 return false; 553 } 554 logger.info("Valid spatial data specified"); 555 return true; 589 556 } 590 557 logger.info("No valid spatial data specified"); 591 558 return false; 592 559 } 593 594 595 static String xqueryStrTemplate =596 "xquery version \"1.0\"; \n"+597 "import module namespace voclib=\"http://ndg.nerc.ac.uk/xquery/lib/vocab\" at \"xmldb:exist:///db/xqueryLib/Vocabs/vocab_xquery_lib.xquery\"; \n"+598 "declare namespace sql=\"http://exist-db.org/xquery/sql\"; \n"+599 "declare namespace moles=\"http://ndg.nerc.ac.uk/moles\"; \n"+600 "declare function local:getScope() as xs:string { \n"+601 " \"__subst_scope__\" \n"+602 "}; \n"+603 "declare function local:getSpatioTempWhereClause() as xs:string { \n"+604 " \"__subst_spatioTempWhereClause__\" \n"+605 "}; \n"+606 "declare function local:doSpatioTemp() as xs:boolean { \n"+607 " __subst_doSpatioTemp__ \n"+608 "}; \n"+609 "declare function local:doTerm() as xs:boolean { \n"+610 " __subst_doTerm__ \n"+611 "}; \n"+612 "declare function local:doOrderBy() as xs:boolean { \n"+613 " __subst_doOrderBy__ \n"+614 "}; \n"+615 616 "declare function local:orderBy($arg1 as item()* )as item()* { \n"+617 " if ( local:doOrderBy() ) then ( \n"+618 " for $i in $arg1 \n"+619 " let $coll := '/db/discovery/moles' \n"+620 " let $thisdoc := $i \n"+621 " let $name := string-join( ($coll , $thisdoc), '/' ) \n"+622 " let $field := __subst_orderByField__ \n"+623 " order by $field __subst_orderByDirection__ \n"+624 " return $i \n"+625 " ) \n"+626 " else $arg1 \n"+627 "}; \n"+628 629 "declare function local:doScope() as xs:boolean { \n"+630 " __subst_doScope__ \n"+631 "}; \n"+632 "declare function local:doSpatioTempQuery() as item()* { \n"+633 " let $connection := sql:get-connection(\"org.postgresql.Driver\", \""+properties.getProperty("jdbc.uri")+"\", \""+properties.getProperty("jdbc.username")+"\", \""+properties.getProperty("jdbc.password")+"\") \n"+634 " let $result := sql:execute($connection, fn:string-join( (\"select * from spatiotemp \", local:getSpatioTempWhereClause(),\";\"), \"\"), fn:true()) \n"+635 " let $s_result := ( \n"+636 " for $j in $result/sql:row/id/text() \n"+637 " return $j \n"+638 " ) \n"+639 " return $s_result \n"+640 "}; \n"+641 "declare function local:doTermQuery() as item()* { \n"+642 " (: Get result of full-text search :) \n"+643 " let $result := __subst_term__ \n"+644 " for $i in $result \n"+645 " return util:document-name($i) \n"+646 "}; \n"+647 "declare function local:mergeResults($arg1 as item()*, $arg2 as item()*) as item()* { \n"+648 " for $i in $arg1[.=$arg2] \n"+649 " return $i \n"+650 "}; \n"+651 "declare function local:filterByScope($arg1 as item()*) as item()* { \n"+652 " if ( local:doScope() ) then ( \n"+653 " for $i in $arg1 \n"+654 " let $coll := '/db/discovery/moles' \n"+655 " let $thisdoc := $i \n"+656 " let $name := string-join( ($coll , $thisdoc), '/' ) \n"+657 " let $molesdoc := document( $name ) \n"+658 " where document($name)/moles:dgMetadata/moles:dgMetadataRecord/moles:dgStructuredKeyword[moles:dgValidTerm &= local:getScope() and voclib:spot-vocab($voclib:ndg_data_provider_vocab, moles:dgValidTermID/moles:ParentListID)] \n"+659 " return $i \n"+660 " ) \n"+661 " else $arg1 \n"+662 "}; \n"+663 664 "declare function local:buildResult() as item()* { \n"+665 " \n"+666 " let $termResult := ( \n"+667 " if ( local:doTerm() ) then local:doTermQuery() \n"+668 " else () \n"+669 " ) \n"+670 " let $spatioResult := ( \n"+671 " if ( local:doSpatioTemp() ) then local:doSpatioTempQuery() \n"+672 " else () \n"+673 " ) \n"+674 " (: populate $seq with appropriate combination of result, depending on search options :) \n"+675 " let $seq := ( \n"+676 677 " if ( local:doTerm() ) then ( \n"+678 " if ( local:doSpatioTemp() ) then ( \n"+679 " local:mergeResults($termResult, $spatioResult) \n"+680 " ) \n"+681 " else ( \n"+682 " $termResult \n"+683 " ) \n"+684 " ) \n"+685 " else ( \n"+686 " if ( local:doSpatioTemp() ) then ( \n"+687 " $spatioResult \n"+688 " ) \n"+689 " else ( \n"+690 " () \n"+691 " ) \n"+692 " ) \n"+693 " ) \n"+694 " for $i in local:orderBy( local:filterByScope( $seq ) ) \n"+695 " return $i \n"+696 "}; \n"+697 "for $i in local:buildResult() \n"+698 "return <document>{$i}</document> \n";699 700 701 702 560 }
Note: See TracChangeset
for help on using the changeset viewer.