Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
   * The ASF licenses this file to You under the Apache License, Version 2.0
   * (the "License"); you may not use this file except in compliance with
   * the License.  You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
 
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_CONFIDENCE;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_DISAMBIGUATOR;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_RESTRICTION;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPARQL;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SUPPORT;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
 import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 
 import java.net.URL;
 import java.util.Map;
 
DBPSpotlightDisambiguateEnhancementEngine provides functionality to enhance document with their language.

Author(s):
Iavor Jelev, Babelmonkeys (GzEvD)
 
 @Component(metatype = true, immediate = true
 	label = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.name"
 	description = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.description")
 @Properties(value = { 
 		@Property(name = ., value = "dbpspotlightdisambiguate"),
 		@Property(name = , value = "http://spotlight.dbpedia.org/rest/annotate"),
 		@Property(name = , value = "Document"),
})
Ensures this engine is deactivated in org.apache.stanbol.commons.stanboltools.offline.OfflineMode
The default value for the Execution of this Engine. Currently set to org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ORDERING_PRE_PROCESSING
	public static final Integer defaultOrder =  - 31;

This contains the logger.
	private static final Logger log = LoggerFactory
holds the url of the Spotlight REST endpoint
	private URL spotlightUrl;
holds the chosen of disambiguator to be used
holds the type restriction for the results, if the user wishes one
holds the chosen minimal support value
holds the chosen minimal confidence value
holds the sparql restriction for the results, if the user wishes one
holds the existing TextAnnotations, which are used as input for DBpedia Spotlight, and later for linking of the results
    private int connectionTimeout;
Default constructor used by OSGI. It is expected that activate(org.osgi.service.component.ComponentContext) is called before using the instance.
Constructor intended to be used for unit tests

Parameters:
serviceURL
	protected DBPSpotlightDisambiguateEnhancementEngine(URL serviceURL,int connectionTimeout){
		this. = serviceURL;
		this. = connectionTimeout;
	}
Initialize all parameters from the configuration panel, or with their default values

	@SuppressWarnings("unchecked")
	protected void activate(ComponentContext cethrows ConfigurationException,
		super.activate(ce);
		Dictionary<StringObjectproperties = ce.getProperties();
		 = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
         = SpotlightEngineUtils.getConnectionTimeout(properties);
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? null
				: (Stringproperties.get();
		 = properties.get() == null ? "-1"
				: (Stringproperties.get();
		 = properties.get() == null ? "-1"
				: (Stringproperties.get();
	}

Check if the content can be enhanced

	public int canEnhance(ContentItem cithrows EngineException {
		return SpotlightEngineUtils.canProcess(ci) ?
	}

Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results

	public void computeEnhancements(ContentItem cithrows EngineException {
		Language language = SpotlightEngineUtils.getContentLanguage(ci);
		String text = SpotlightEngineUtils.getPlainContent(ci);
		// Retrieve the existing text annotations (requires read lock)
		MGraph graph = ci.getMetadata();
		String xmlTextAnnotations = this.getSpottedXml(textgraph);
		Collection<AnnotationdbpslGraph = doPostRequest(text,
				xmlTextAnnotationsci.getUri());
		if (dbpslGraph != null) {
			// Acquire a write lock on the ContentItem when adding the
			// enhancements
			try {
				createEnhancements(dbpslGraphcilanguage);
					Serializer serializer = Serializer.getInstance();
					serializer.serialize(debugStreamci.getMetadata(),
							"application/rdf+xml");
					try {
						.debug("DBpedia Enhancements:\n{}",
								debugStream.toString("UTF-8"));
					}
				}
finally {
			}
		}
	}

The method adds the returned DBpedia Spotlight annotations to the content item's metadata. For each DBpedia resource an EntityAnnotation is created and linked to the according TextAnnotation.

Parameters:
occs a Collection of entity information
ci the content item
			ContentItem ciLanguage language) {
		HashMap<ResourceUriRefentityAnnotationMap = new HashMap<ResourceUriRef>();
		for (Annotation occ : occs) {
			if (.get(occ.surfaceForm) != null) {
				UriRef textAnnotation = .get(occ.surfaceForm);
				MGraph model = ci.getMetadata();
				UriRef entityAnnotation = EnhancementEngineHelper
				entityAnnotationMap.put(occ.urientityAnnotation);
				Literal label = new PlainLiteralImpl(occ.surfaceForm.namelanguage);
				model.add(new TripleImpl(entityAnnotation,
						textAnnotation));
				model.add(new TripleImpl(entityAnnotation,
				if (t != null) {
					Iterator<Stringit = t.iterator();
					while (it.hasNext())
						model.add(new TripleImpl(entityAnnotation,
				}
				model.add(new TripleImpl(entityAnnotation,
			}
		}
	}

Sends a POST request to the DBpediaSpotlight url.

Parameters:
text a String with the text to be analyzed
xmlTextAnnotations
textAnnotations
contentItemUri the URI of the org.apache.stanbol.enhancer.servicesapi.ContentItem (only used for logging in case of an error)
Returns:
a String with the server response
Throws:
org.apache.stanbol.enhancer.servicesapi.EngineException if the request cannot be sent
			String xmlTextAnnotationsUriRef contentItemUrithrows EngineException {
		HttpURLConnection connection = null;
		BufferedWriter wr = null;
		try {
			connection.setRequestMethod("POST");
			connection.setRequestProperty("Content-Type",
					"application/x-www-form-urlencoded");
			connection.setRequestProperty("Accept""text/xml");
            //set ConnectionTimeout (if configured)
            if( > 0){
                connection.setConnectTimeout(*1000);
                connection.setReadTimeout(*1000);
            }
            connection.setUseCaches(false);
			connection.setDoInput(true);
			connection.setDoOutput(true);
			// Send request
					connection.getOutputStream(),));
catch (IOException e) {
			IOUtils.closeQuietly(wr);
			throw new EngineException("Unable to open connection to "+
		}
		try {
			wr.write("spotter=SpotXmlParser&");
				wr.write("disambiguator=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
				wr.write("types=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
			if ( != null && !.isEmpty()) {
				wr.write("support=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
				wr.write("confidence=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
					&&  == null) {
				wr.write("sparql=");
				wr.write(URLEncoder.encode("UTF-8"));
				wr.write('&');
			}
			wr.write("text=");
			wr.write(URLEncoder.encode(xmlTextAnnotations"UTF-8"));
					"The platform does not support encoding " + .name(),e);
catch (IOException e) {
			throw new EngineException("Unable to write 'plain/text' content "
"for ContentItem "+contentItemUri+" to "
finally {
			IOUtils.closeQuietly(wr);
		}
		InputStream is = null;
		Document xmlDoc;
		try {
			// Get Response
			 is = connection.getInputStream();
			xmlDoc = loadXMLFromInputStream(is);
catch (IOException e) {
			throw new EngineException("Unable to spot Entities with"
"Dbpedia Spotlight Annotate RESTful Serice running at "
catch(SAXException e) {
			throw new EngineException("Unable to parse Response from "
"Dbpedia Spotlight Annotate RESTful Serice running at "
finally {
			IOUtils.closeQuietly(is);
		}
		return Annotation.parseAnnotations(xmlDoc);
	}
	private String getSpottedXml(String textMGraph graph) {
		xml.append(String.format("<annotation text=\"%s\">"text));
		try {
			for (Iterator<Tripleit = graph.filter(null,
				// Triple tAnnotation = it.next();
				UriRef uri = (UriRefit.next().getSubject();
				String surfaceForm = EnhancementEngineHelper.getString(graph,
				if (surfaceForm != null) {
					String offset = EnhancementEngineHelper.getString(graph,
					.put(surfaceFormuri);
					xml.append(String.format(
							"<surfaceForm name=\"%s\" offset=\"%s\"/>",
							surfaceFormoffset));
				}
			}
catch (Exception e) {
		}
		return xml.append("</annotation>").toString();
	}
		return Collections.unmodifiableMap(Collections.singletonMap(
	}
New to GrepCode? Check out our FAQ X