Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
   * The ASF licenses this file to You under the Apache License, Version 2.0
   * (the "License"); you may not use this file except in compliance with
   * the License.  You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.candidates;
 
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_CONFIDENCE;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_DISAMBIGUATOR;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_RESTRICTION;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPARQL;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPOTTER;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SUPPORT;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils.getConnectionTimeout;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 
 import java.net.URL;
 import java.util.Map;
 
DBPSpotlightCandidatesEnhancementEngine provides functionality to enhance document with their language.

Author(s):
Iavor Jelev, Babelmonkeys (GzEvD)
 
 @Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightCandidatesEnhancementEngine.name", description = "%stanbol.DBPSpotlightCandidatesEnhancementEngine.description")
 @Properties(value = { 
 		@Property(name = ., value = "dbpspotlightcandidates"),
 		@Property(name = , value = "http://spotlight.dbpedia.org/rest/candidates"),
 		@Property(name = ),
 		@Property(name = ),
 		@Property(name = ),
 })
Ensures this engine is deactivated in org.apache.stanbol.commons.stanboltools.offline.OfflineMode
 
The default value for the Execution of this Engine. Currently set to org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ORDERING_CONTENT_EXTRACTION -35
	public static final Integer defaultOrder =  - 35;


This contains the logger.
	private static final Logger log = LoggerFactory
holds the url of the Spotlight REST endpoint
	private URL spotlightUrl;
holds the chosen of spotter to be used
holds the chosen of disambiguator to be used
holds the type restriction for the results, if the user wishes one
holds the chosen minimal support value
holds the chosen minimal confidence value
holds the sparql restriction for the results, if the user wishes one
    private int connectionTimeout;

Used by OSGI to instantiate the engine. Expects activate(org.osgi.service.component.ComponentContext) to be called before usage
Used by unit tests

Parameters:
spotlightUrl
	protected DBPSpotlightCandidatesEnhancementEngine(URL spotlightUrl,int connectionTimeout){
		this. = spotlightUrl;
		this. = connectionTimeout;
	}

Initialize all parameters from the configuration panel, or with their default values

	@SuppressWarnings("unchecked")
	protected void activate(ComponentContext cethrows ConfigurationException,
		super.activate(ce);
		// TODO initialize Extractor
		Dictionary<StringObjectproperties = ce.getProperties();
		//parse the URL of the RESTful service
		 = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
         = SpotlightEngineUtils.getConnectionTimeout(properties);
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
	}

Check if the content can be enhanced

	public int canEnhance(ContentItem cithrows EngineException {
		return SpotlightEngineUtils.canProcess(ci) ?
	}

Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results

	public void computeEnhancements(ContentItem cithrows EngineException {
		Language language = SpotlightEngineUtils.getContentLanguage(ci);
		String text = SpotlightEngineUtils.getPlainContent(ci);
		Collection<SurfaceFormdbpslGraph = doPostRequest(text,ci.getUri());
		if (dbpslGraph != null) {
			// Acquire a write lock on the ContentItem when adding the
			// enhancements
			try {
				createEnhancements(dbpslGraphci,text,language);
					Serializer serializer = Serializer.getInstance();
					serializer.serialize(debugStreamci.getMetadata(),
							"application/rdf+xml");
					try {
						.debug("DBpedia Spotlight Spot Enhancements:\n{}",
								debugStream.toString("UTF-8"));
					}
				}
finally {
			}
		}
	}

This generates enhancement structures for the entities from DBPedia Spotlight and adds them to the content item's metadata. For each surface form a TextAnnotation and the according EntityAnnotations are created.

Parameters:
occs a Collection of entity information
ci the content item
	protected void createEnhancements(Collection<SurfaceFormoccs,
			ContentItem ciString textLanguage language) {
		// TODO create TextEnhancement (form, start, end, type?)
		HashMap<StringUriRefentityAnnotationMap = new HashMap<StringUriRef>();
		MGraph model = ci.getMetadata();
		for (SurfaceForm occ : occs) {
			UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
					occthiscitextlanguage);
			Iterator<CandidateResourceresources = occ.resources.iterator();
			while (resources.hasNext()) {
				CandidateResource resource = resources.next();
				UriRef entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(
						resourcethiscitextAnnotation);
				entityAnnotationMap.put(resource.urientityAnnotation);
			}
			if (entityAnnotationMap.containsKey(occ.name)) {
				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
						textAnnotation));
else {
				entityAnnotationMap.put(occ.nametextAnnotation);
			}
		}
	}

Sends a POST request to the DBpediaSpotlight url.

Parameters:
text a String with the text to be analyzed
contentItemUri Just used for logging
Returns:
a String with the server response
Throws:
org.apache.stanbol.enhancer.servicesapi.EngineException if the request cannot be sent
	protected Collection<SurfaceFormdoPostRequest(String text,UriRef contentItemUri)
			throws EngineException {
		HttpURLConnection connection = null;
		BufferedWriter wr = null;
		try {
			connection.setRequestMethod("POST");
			connection.setRequestProperty("Content-Type",
					"application/x-www-form-urlencoded");
			connection.setRequestProperty("Accept""text/xml");
            //set ConnectionTimeout (if configured)
            if( > 0){
                connection.setConnectTimeout(*1000);
                connection.setReadTimeout(*1000);
            }
            connection.setUseCaches(false);
			connection.setDoInput(true);
			connection.setDoOutput(true);
			// Send request
					connection.getOutputStream(),));
catch (IOException e) {
			IOUtils.closeQuietly(wr);
			throw new EngineException("Unable to open connection to "+
		}
		try {
			if ( != null && !.isEmpty()) {
				wr.write("spotter=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
				wr.write("disambiguator=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
				wr.write("types=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
			if ( != null && !.isEmpty()){
				wr.write("support=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
				wr.write("confidence=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
					&&  == null) {
				wr.write("sparql=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
			wr.write("text=");
			wr.write(URLEncoder.encode(text"UTF-8"));
					"The platform does not support encoding " + .name(),e);
catch (IOException e) {
			throw new EngineException("Unable to write 'plain/text' content "
"for ContentItem "+contentItemUri+" to "
finally {
			IOUtils.closeQuietly(wr);
		}
		InputStream is = null;
		Document xmlDoc;
		try {
			// Get Response
			 is = connection.getInputStream();
			xmlDoc = loadXMLFromInputStream(is);
catch (IOException e) {
			throw new EngineException("Unable to spot Entities with"
"Dbpedia Spotlight Spot RESTful Serice running at "
catch(SAXException e) {
			throw new EngineException("Unable to parse Response from "
"Dbpedia Spotlight Spot RESTful Serice running at "
finally {
			IOUtils.closeQuietly(is);
		}
		return CandidateResource.parseCandidates(xmlDoc);
	}
		return Collections.unmodifiableMap(Collections.singletonMap(
	}
New to GrepCode? Check out our FAQ X