Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
   * The ASF licenses this file to You under the Apache License, Version 2.0
   * (the "License"); you may not use this file except in compliance with
   * the License.  You may obtain a copy of the License at
   *
   *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.catalina.valves;
 
 import static org.jboss.web.CatalinaMessages.MESSAGES;
 
 import java.util.Map;
 
 
Web crawlers can trigger the creation of many thousands of sessions as they crawl a site which may result in significant memory consumption. This Valve ensures that crawlers are associated with a single session - just like normal users - regardless of whether or not they provide a session token with their requests.
 
 public class CrawlerSessionManagerValve extends ValveBase
         implements LifecycleHttpSessionBindingListener {

    
The lifecycle event support for this component.
 
     protected LifecycleSupport lifecycle = new LifecycleSupport(this);
     protected boolean started = false;
 
     private final Map<String,StringclientIpSessionId =
         new ConcurrentHashMap<StringString>();
     private final Map<String,StringsessionIdClientIp =
         new ConcurrentHashMap<StringString>();
 
     private String crawlerUserAgents =
         ".*[bB]ot.*|.*Yahoo! Slurp.*|.*Feedfetcher-Google.*";
     private Pattern uaPattern = null;
     private int sessionInactiveInterval = 60;


    
Specify the regular expression (using java.util.regex.Pattern) that will be used to identify crawlers based in the User-Agent header provided. The default is ".*GoogleBot.*|.*bingbot.*|.*Yahoo! Slurp.*"

Parameters:
crawlerUserAgents The regular expression using java.util.regex.Pattern
 
     public void setCrawlerUserAgents(String crawlerUserAgents) {
         this. = crawlerUserAgents;
         if (crawlerUserAgents == null || crawlerUserAgents.length() == 0) {
              = null;
         } else {
              = Pattern.compile(crawlerUserAgents);
         }
     }

    

Returns:
The current regular expression being used to match user agents.
See also:
setCrawlerUserAgents(java.lang.String)
 
     public String getCrawlerUserAgents() {
         return ;
     }


    
Specify the session timeout (in seconds) for a crawler's session. This is typically lower than that for a user session. The default is 60 seconds.

Parameters:
sessionInactiveInterval The new timeout for crawler sessions
 
     public void setSessionInactiveInterval(int sessionInactiveInterval) {
         this. = sessionInactiveInterval;
    }

    

Returns:
The current timeout in seconds
See also:
setSessionInactiveInterval(int)
    public int getSessionInactiveInterval() {
        return ;
    }
    public Map<String,StringgetClientIpSessionId() {
        return ;
    }
    // ------------------------------------------------------ Lifecycle Methods
    public void addLifecycleListener(LifecycleListener listener) {
        .addLifecycleListener(listener);
    }
        return .findLifecycleListeners();
    }
    public void removeLifecycleListener(LifecycleListener listener) {
        .removeLifecycleListener(listener);
    }
    public void start() throws LifecycleException {
        // Validate and update our current component state
        if ()
            throw new LifecycleException(.valveAlreadyStarted());
         = true;
         = Pattern.compile();
    }
    public void stop() throws LifecycleException {
        // Validate and update our current component state
        if (!)
            throw new LifecycleException(.valveNotStarted());
        .fireLifecycleEvent(null);
         = false;
        
    }
    
    @Override
    public void invoke(Request requestResponse responsethrows IOException,
            ServletException {
        boolean isBot = false;
        String sessionId = null;
        String clientIp = null;
            ..debug(request.hashCode() + ": ClientIp=" +
                    request.getRemoteAddr() + ", RequestedSessionId=" +
                    request.getRequestedSessionId());
        }
        // If the incoming request has a valid session ID, no action is required
        if (request.getSession(false) == null) {
            // Is this a crawler - check the UA headers
            Enumeration<StringuaHeaders = request.getHeaders("user-agent");
            String uaHeader = null;
            if (uaHeaders.hasMoreElements()) {
                uaHeader = uaHeaders.nextElement();
            }
            // If more than one UA header - assume not a bot
            if (uaHeader != null && !uaHeaders.hasMoreElements()) {
                if (..isDebugEnabled()) {
                    ..debug(request.hashCode() + ": UserAgent=" + uaHeader);
                }
                if (.matcher(uaHeader).matches()) {
                    isBot = true;
                    if (..isDebugEnabled()) {
                        ..debug(request.hashCode() +
                                ": Bot found. UserAgent=" + uaHeader);
                    }
                }
            }
            // If this is a bot, is the session ID known?
            if (isBot) {
                clientIp = request.getRemoteAddr();
                sessionId = .get(clientIp);
                if (sessionId != null) {
                    request.setRequestedSessionId(sessionId);
                    if (..isDebugEnabled()) {
                        ..debug(request.hashCode() + ": SessionID=" +
                                sessionId);
                    }
                }
            }
        }
        getNext().invoke(requestresponse);
        if (isBot) {
            if (sessionId == null) {
                // Has bot just created a session, if so make a note of it
                HttpSession s = request.getSession(false);
                if (s != null) {
                    .put(clientIps.getId());
                    .put(s.getId(), clientIp);
                    // #valueUnbound() will be called on session expiration
                    s.setAttribute(this.getClass().getName(), this);
                    s.setMaxInactiveInterval();
                    if (..isDebugEnabled()) {
                        ..debug(request.hashCode() +
                                ": New bot session. SessionID=" + s.getId());
                    }
                }
            } else {
                if (..isDebugEnabled()) {
                    ..debug(request.hashCode() +
                            ": Bot session accessed. SessionID=" + sessionId);
                }
            }
        }
    }
    @Override
    public void valueBound(HttpSessionBindingEvent event) {
        // NOOP
    }
    @Override
    public void valueUnbound(HttpSessionBindingEvent event) {
        String clientIp = .remove(event.getSession().getId());
        if (clientIp != null) {
            .remove(clientIp);
        }
    }
New to GrepCode? Check out our FAQ X