Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
   * The ASF licenses this file to You under the Apache License, Version 2.0
   * (the "License"); you may not use this file except in compliance with
   * the License.  You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
 
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPOTTER;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 
 import java.net.URL;
 import java.util.Map;
 
DBPSpotlightSpotEnhancementEngine provides functionality to enhance document with their language.

Author(s):
Iavor Jelev, Babelmonkeys (GzEvD)
 
 @Component(metatype = true, immediate = true
 	label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name"
 	description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
 @Properties(value = { 
 		@Property(name = ., value = "dbpspotlightspot"),
 		@Property(name = , value = "http://spotlight.dbpedia.org/rest/spot"),
 })
 public class DBPSpotlightSpotEnhancementEngine extends
Ensures this engine is deactivated in org.apache.stanbol.commons.stanboltools.offline.OfflineMode
 
 	@SuppressWarnings("unused")
 	private OnlineMode onlineMode;

The default value for the Execution of this Engine. Currently set to org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ORDERING_CONTENT_EXTRACTION - 29
 
 	public static final Integer defaultOrder =  - 29;


holds the logger.
	private static final Logger log = LoggerFactory
holds the url of the Spotlight REST endpoint
	private URL spotlightUrl;
holds the chosen of spotter to be used
    private int connectionTimeout;

Default constructor used by OSGI
	protected DBPSpotlightSpotEnhancementEngine(URL spotlightUrlString spotlightSpotterint connectionTimeout){
		this. = spotlightUrl;
		this. = spotlightSpotter;
		this. = connectionTimeout;
	}

Initialize all parameters from the configuration panel, or with their default values

	@SuppressWarnings("unchecked")
	protected void activate(ComponentContext cethrows ConfigurationException,
		super.activate(ce);
		Dictionary<StringObjectproperties = ce.getProperties();
		 = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
         = SpotlightEngineUtils.getConnectionTimeout(properties);
		//also set the spotter to null if an empty string is parsed
		Object spotterConfig = properties.get();
		 = spotterConfig != null && !spotterConfig.toString().isEmpty() ?
				spotterConfig.toString() : null;
	}

Check if the content can be enhanced

	public int canEnhance(ContentItem cithrows EngineException {
		return SpotlightEngineUtils.canProcess(ci) ?
	}

Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results

	public void computeEnhancements(ContentItem cithrows EngineException {
		Language language = SpotlightEngineUtils.getContentLanguage(ci);
		String text = SpotlightEngineUtils.getPlainContent(ci);
		Collection<SurfaceFormdbpslGraph = doPostRequest(text,ci.getUri());
		if (dbpslGraph != null) {
			// Acquire a write lock on the ContentItem when adding the
			// enhancements
			try {
				createEnhancements(dbpslGraphci,text,language);
					Serializer serializer = Serializer.getInstance();
					serializer.serialize(debugStreamci.getMetadata(),
							"application/rdf+xml");
					try {
						.debug("DBpedia Spotlight Spot Enhancements:\n{}",
								debugStream.toString("UTF-8"));
					}
				}
finally {
			}
		}
	}

The method adds the returned DBpedia Spotlight surface forms to the content item's metadata. For each one an TextAnnotation is created.

Parameters:
occs a Collection of entity information
ci the content item
	protected void createEnhancements(Collection<SurfaceFormoccs,
			ContentItem ci,  String contentLanguage lang) {
		HashMap<StringUriRefentityAnnotationMap = new HashMap<StringUriRef>();
		MGraph model = ci.getMetadata();
		for (SurfaceForm occ : occs) {
			UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
					occthiscicontentlang);
			if (entityAnnotationMap.containsKey(occ.name)) {
				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
						textAnnotation));
else {
				entityAnnotationMap.put(occ.nametextAnnotation);
			}
		}
	}


Sends a POST request to the DBpediaSpotlight url.

Parameters:
text a String with the text to be analyzed
contentItemUri the URI of the ContentItem (only used for logging)
Returns:
a String with the server response
Throws:
org.apache.stanbol.enhancer.servicesapi.EngineException if the request cannot be sent
	protected Collection<SurfaceFormdoPostRequest(String text,UriRef contentItemUri)
			throws EngineException {
		//rwesten: reimplemented this so that the request
		//         is directly written to the request instead
		//         of storing the data in an in-memory StringBuilder
		HttpURLConnection connection = null;
		BufferedWriter wr = null;
		try {
			connection.setRequestMethod("POST");
			connection.setRequestProperty("Content-Type",
					"application/x-www-form-urlencoded");
			connection.setRequestProperty("Accept""text/xml");
            //set ConnectionTimeout (if configured)
            if( > 0){
                connection.setConnectTimeout(*1000);
                connection.setReadTimeout(*1000);
            }
            connection.setUseCaches(false);
			connection.setDoInput(true);
			connection.setDoOutput(true);
			// Send request
					connection.getOutputStream(),));
catch (IOException e) {
			IOUtils.closeQuietly(wr);
			throw new EngineException("Unable to open connection to "+
		}
		try {
			if ( != null && !.isEmpty()) {
				wr.write("spotter=");
				wr.write(URLEncoder.encode(.name()));
				wr.write('&');
			}
			wr.write("text=");
			//now append the URL encoded text
			//TODO: This will load the URLEncoded variant in-memory.
			//      One could avoid that by encoding the data in smaller
			//      pieces, but using URLEncoding for big data is anyway
			//      very inefficient. So instead of fixing this issue here
			//      DBpedia Spotlight should support "multipart/from-data"
			//      instead.
			//      As soon as this is supported this should be re-implemented
			//      to support streaming.
			wr.write(URLEncoder.encode(text.name()));
					"The platform does not support encoding " + .name(),e);
catch (IOException e) {
			throw new EngineException("Unable to write 'plain/text' content "
"for ContentItem "+contentItemUri+" to "
finally {
			IOUtils.closeQuietly(wr);
		}
		// rwesten: reimplemented this to read the XML
		// Document directly form the response
		InputStream is = null;
		Document xmlDoc;
		try {
			// Get Response
			 is = connection.getInputStream();
			xmlDoc = loadXMLFromInputStream(is);
catch (IOException e) {
			throw new EngineException("Unable to spot Entities with"
"Dbpedia Spotlight Spot RESTful Serice running at "
catch(SAXException e) {
			throw new EngineException("Unable to parse Response from "
"Dbpedia Spotlight Spot RESTful Serice running at "
finally {
			IOUtils.closeQuietly(is);
		}
		//rwesten: commented the disconnect to allow keep-alive
		//connection.disconnect();
	    return SurfaceForm.parseSurfaceForm(xmlDoc);
	}
		return Collections.unmodifiableMap(Collections.singletonMap(
	}
New to GrepCode? Check out our FAQ X