diff -U3 -r source.8June2004/src/services/org/opennms/netmgt/outage/OutageConstants.java source/src/services/org/opennms/netmgt/outage/OutageConstants.java --- source.8June2004/src/services/org/opennms/netmgt/outage/OutageConstants.java 2004-06-08 09:24:46.000000000 +1200 +++ source/src/services/org/opennms/netmgt/outage/OutageConstants.java 2004-06-08 12:38:58.000000000 +1200 @@ -27,6 +27,8 @@ package org.opennms.netmgt.outage; +import org.opennms.netmgt.EventConstants; + /** *

This class is a repository for constant, static information concerning the Outage Manager. * @@ -175,7 +177,12 @@ /** * The sql statement used to close all open outages for a nodeid */ - public static final String DB_UPDATE_OUTAGES_FOR_NODE = "UPDATE outages set svcRegainedEventID=?, ifRegainedService=? where (nodeid = ? and (ifRegainedService IS NULL))"; + public static final String DB_UPDATE_OUTAGES_FOR_NODE = "UPDATE outages set svcRegainedEventID=?, ifRegainedService=? where ((ifRegainedService IS NULL) and svclosteventid=?)"; + + /** + * The sql statement to find the latest nodeDown outage for a given node + */ + public static final String DB_GET_NODE_DOWN_EVENT = "SELECT max(eventid) from events where (nodeid = ? AND eventuei='"+EventConstants.NODE_DOWN_EVENT_UEI+"')"; /** * The sql statement used to record an event cache hit diff -U3 -r source.8June2004/src/services/org/opennms/netmgt/outage/OutageWriter.java source/src/services/org/opennms/netmgt/outage/OutageWriter.java --- source.8June2004/src/services/org/opennms/netmgt/outage/OutageWriter.java 2004-06-08 09:24:46.000000000 +1200 +++ source/src/services/org/opennms/netmgt/outage/OutageWriter.java 2004-06-08 12:37:29.000000000 +1200 @@ -845,6 +845,32 @@ } /** + * Finds the most recent nodeDown eventid + * @param dbConn a connection to use + * @param nodeId the node id to find the event for + * @return the eventid of the node, or null if none exists. + * @throws SQLException + */ + private Long getNodeDownEventId(Connection dbConn, long nodeId) throws SQLException { + PreparedStatement openStmt = null; + long result; + openStmt = dbConn.prepareStatement(OutageConstants.DB_GET_NODE_DOWN_EVENT); + openStmt.setLong(1, nodeId); + + ResultSet rs = openStmt.executeQuery(); + if (rs.next()) { + // close result set + rs.close(); + + // close statement + openStmt.close(); + + return new Long(rs.getLong(1)); + } + return null; + } + + /** *

Record the 'nodeUp' event in the outages table - close all open * outage entries for the nodeid in the outages table

*/ @@ -864,9 +890,10 @@ dbConn = DatabaseConnectionFactory.getInstance().getConnection(); int count = 0; + Long nodeDownEventId = getNodeDownEventId(dbConn, nodeID); - if (openOutageExists(dbConn, nodeID)) - { + if (nodeDownEventId != null) + { // Set the database commit mode try @@ -884,7 +911,7 @@ PreparedStatement outageUpdater = dbConn.prepareStatement(OutageConstants.DB_UPDATE_OUTAGES_FOR_NODE); outageUpdater.setLong (1, eventID); outageUpdater.setTimestamp(2, convertEventTimeIntoTimestamp(eventTime)); - outageUpdater.setLong (3, nodeID); + outageUpdater.setLong (3, nodeDownEventId.longValue()); count = outageUpdater.executeUpdate(); // close statement diff -U3 -r source.8June2004/src/services/org/opennms/netmgt/poller/PollableNode.java source/src/services/org/opennms/netmgt/poller/PollableNode.java --- source.8June2004/src/services/org/opennms/netmgt/poller/PollableNode.java 2004-06-08 09:24:46.000000000 +1200 +++ source/src/services/org/opennms/netmgt/poller/PollableNode.java 2004-06-12 10:09:23.000000000 +1200 @@ -1,4 +1,3 @@ -// // Copyright (C) 2002 Sortova Consulting Group, Inc. All rights reserved. // Parts Copyright (C) 1999-2001 Oculan Corp. All rights reserved. // @@ -262,60 +261,112 @@ Category log = ThreadCategory.getInstance(getClass()); Events events = new Events(); - + // Create date object which will serve as the source // for the time on all generated events java.util.Date date = new java.util.Date(); + //log.debug("generateEvents started with m_statusChangedFlag "+m_statusChangedFlag+" and m_status "+m_status); if (m_statusChangedFlag && m_status == Pollable.STATUS_DOWN) { + //log.debug("status changed, and now down - creating node_down"); // create nodeDown event and add it to the event list events.addEvent( createEvent(EventConstants.NODE_DOWN_EVENT_UEI, null, null, date) ); + //Now it's time to check all services. It they are up, set them down and set downByNodeDown so + //that when the node comes back up, a seviceRegained will not be sent. + // Do the same for all services which are marked as having a statuschange - they will have just + // been marked down as part of the polling cycle which caused this nodeDown event + // Any services already down but not marked with a statuschange must have been down before - + // do *not* set their downByNodeDown, as they have most likely had a serviceLost event already + // and require a serviceRegained when they come back up (e.g. when the node comes back up) + Iterator i = m_interfaces.values().iterator(); + while (i.hasNext()) { + PollableInterface pIf = (PollableInterface)i.next(); + //log.debug("Checking interface "+pIf.getAddress().getHostAddress()); + Iterator s = pIf.getServices().iterator(); + while (s.hasNext()) { + PollableService pSvc = (PollableService)s.next(); + //log.debug("Checking service "+pSvc.getServiceName()); + if(pSvc.statusChanged() || (pSvc.getStatus()==Pollable.STATUS_UP)) { + //STatus changed (must have been to "down", by definition of a nodedown event) + //log.debug("Service had status changed ("+pSvc.statusChanged()+ + // "), or status was up ("+(pSvc.getStatus()==Pollable.STATUS_UP)+")"); + pSvc.setDownByNodeDown(true); + pSvc.setStatus(Pollable.STATUS_DOWN); + } + } + } + resetStatusChanged(); } else if (m_statusChangedFlag && m_status == Pollable.STATUS_UP) { + //log.debug("status changed, and now up, creating node up event"); // send nodeUp event events.addEvent( createEvent(EventConstants.NODE_UP_EVENT_UEI, null, null, date) ); - resetStatusChanged(); // iterate over the node's interfaces - // if interface status is DOWN + // if interface status has changed and is DOWN // generate interfaceDown event - // else if interface status is UP + // else if interface status has changed and is UP // iterate over interface's services - // if service status is DOWN + // if service status is *has changed*, send the appropriate event // generate serviceDown event // Iterator i = m_interfaces.values().iterator(); while (i.hasNext()) { PollableInterface pIf = (PollableInterface)i.next(); - if (pIf.getStatus() == Pollable.STATUS_DOWN) + //log.debug("Checking interface "+pIf.getAddress().getHostAddress()); + if (pIf.statusChanged() && pIf.getStatus() == Pollable.STATUS_DOWN) { + //log.debug("Interface state changed, and is now down, sending interface down event"); events.addEvent( createEvent(EventConstants.INTERFACE_DOWN_EVENT_UEI, pIf.getAddress(), null, date) ); pIf.resetStatusChanged(); } - else if (pIf.getStatus() == Pollable.STATUS_UP) + else if (pIf.statusChanged() && pIf.getStatus() == Pollable.STATUS_UP) { + //log.debug("Interface state changed and is now up, checking services now"); Iterator s = pIf.getServices().iterator(); while (s.hasNext()) { PollableService pSvc = (PollableService)s.next(); - if (pSvc.getStatus() == Pollable.STATUS_DOWN) - { - events.addEvent( createEvent(EventConstants.NODE_LOST_SERVICE_EVENT_UEI, - pIf.getAddress(), - pSvc.getServiceName(), - date) ); - pSvc.resetStatusChanged(); - } + //log.debug("Checking service "+pSvc.getServiceName()); + + //The service was just checked (after finding the interface was up). If it's still down, + // and it was set to down by the node down, then + // send a lost service event, because it's definitely a separate problem + // (e.g. the service didn't come back up properly after a reboot, or something similar) + if ((pSvc.getStatus() == Pollable.STATUS_DOWN) && pSvc.getDownByNodeDown()) + { + //log.debug("Service status is now down, and it was set down by the node going down. Sending node lost service event"); + events.addEvent( createEvent(EventConstants.NODE_LOST_SERVICE_EVENT_UEI, + pIf.getAddress(), + pSvc.getServiceName(), + date) ); + } + else if (pSvc.statusChanged() && (pSvc.getStatus() == Pollable.STATUS_UP) && !pSvc.getDownByNodeDown()) + { + //The stauts changed, it's now up, but it *wasn't* set down because the node went down, therefore + // it must have been down before the node went down - send a regained service event + //log.debug("Service status has changed, is now up, and wasn't down by node down - sending regained service event"); + events.addEvent( createEvent(EventConstants.NODE_REGAINED_SERVICE_EVENT_UEI, + pIf.getAddress(), + pSvc.getServiceName(), + date) ); + + } + //node is now up - clear this flag on all services + pSvc.setDownByNodeDown(false); + + //And tidy up for future runs + pSvc.resetStatusChanged(); } } } @@ -337,13 +388,17 @@ // else if status of service changed to DOWN // generate serviceDown event // + //log.debug("Status hasn't changed, and status is still up - standard check for status changes at interface and service level"); Iterator i = m_interfaces.values().iterator(); while (i.hasNext()) { PollableInterface pIf = (PollableInterface)i.next(); + //log.debug("Checking interface "+pIf.getAddress().getHostAddress()); + if (pIf.statusChanged() && pIf.getStatus() == Pollable.STATUS_DOWN) { - events.addEvent( createEvent(EventConstants.INTERFACE_DOWN_EVENT_UEI, + //log.debug("Interface status has changed and is now down, sending interface down event"); + events.addEvent( createEvent(EventConstants.INTERFACE_DOWN_EVENT_UEI, pIf.getAddress(), null, date) ); @@ -351,34 +406,52 @@ } else if (pIf.statusChanged() && pIf.getStatus() == Pollable.STATUS_UP) { + //log.debug("IF status has changed, and is now up - sending up event"); events.addEvent( createEvent(EventConstants.INTERFACE_UP_EVENT_UEI, pIf.getAddress(), null, date) ); pIf.resetStatusChanged(); + //log.debug("Checking services for changes"); Iterator s = pIf.getServices().iterator(); while (s.hasNext()) { PollableService pSvc = (PollableService)s.next(); - if (pSvc.getStatus() == Pollable.STATUS_DOWN) - { - events.addEvent( createEvent(EventConstants.NODE_LOST_SERVICE_EVENT_UEI, - pIf.getAddress(), - pSvc.getServiceName(), - date) ); - pSvc.resetStatusChanged(); - } + //log.debug("Checking service "+pSvc.getServiceName()); + + if (pSvc.statusChanged() && pSvc.getStatus() == Pollable.STATUS_DOWN) + { + //log.debug("Service status has changed, and is now down - sending node lost service event"); + events.addEvent( createEvent(EventConstants.NODE_LOST_SERVICE_EVENT_UEI, + pIf.getAddress(), + pSvc.getServiceName(), + date) ); + pSvc.resetStatusChanged(); + } + else if (pSvc.statusChanged() && pSvc.getStatus() == Pollable.STATUS_UP) + { + //log.debug("Service status has changed, and is now up - sending node regained service even"); + events.addEvent( createEvent(EventConstants.NODE_REGAINED_SERVICE_EVENT_UEI, + pIf.getAddress(), + pSvc.getServiceName(), + date) ); + pSvc.resetStatusChanged(); + } } } else { + //log.debug("Last chance - interface status hasn't changed - checking each service explicitly (standard interface check)"); Iterator s = pIf.getServices().iterator(); while (s.hasNext()) { PollableService pSvc = (PollableService)s.next(); + //log.debug("Chekcing service "+pSvc.getServiceName()); + if (pSvc.statusChanged() && pSvc.getStatus() == Pollable.STATUS_DOWN) { + //log.debug("Status has changed, and is now down - sending node lost service event"); events.addEvent( createEvent(EventConstants.NODE_LOST_SERVICE_EVENT_UEI, pIf.getAddress(), pSvc.getServiceName(), @@ -387,6 +460,7 @@ } else if (pSvc.statusChanged() && pSvc.getStatus() == Pollable.STATUS_UP) { + //log.debug("Status has changed, and is now up - sending node regained service even"); events.addEvent( createEvent(EventConstants.NODE_REGAINED_SERVICE_EVENT_UEI, pIf.getAddress(), pSvc.getServiceName(), diff -U3 -r source.8June2004/src/services/org/opennms/netmgt/poller/PollableService.java source/src/services/org/opennms/netmgt/poller/PollableService.java --- source.8June2004/src/services/org/opennms/netmgt/poller/PollableService.java 2004-06-08 09:24:46.000000000 +1200 +++ source/src/services/org/opennms/netmgt/poller/PollableService.java 2004-06-12 08:01:24.000000000 +1200 @@ -172,7 +172,15 @@ * mapped by the composite key (package name, service name). */ private static Map SVC_PROP_MAP = Collections.synchronizedMap(new TreeMap()); - + + /** + * Set to true by PollableNode when a nodeDown event is sent, and this service has + * either changed state to down, or is still up. When the node comes up and this is set + * to true, a serviceRegained event will *not* be sent. + * When false, nothing special happens + */ + private boolean m_downByNodeDown; + /** * Constructs a new instance of a pollable service object that is * polled using the passed monitor. The service is scheduled based @@ -284,6 +292,7 @@ public void resetStatusChanged() { + ThreadCategory.getInstance(getClass()).debug("resetStatusChanged()"); m_statusChangedFlag = false; } @@ -300,6 +309,15 @@ m_statusChangeTime = System.currentTimeMillis(); } } + + + public boolean getDownByNodeDown() { + return m_downByNodeDown; + } + + public void setDownByNodeDown(boolean downByNodeDown) { + this.m_downByNodeDown=downByNodeDown; + } public void markAsDeleted() { @@ -999,12 +1017,13 @@ Category log = ThreadCategory.getInstance(getClass()); m_lastPoll = System.currentTimeMillis(); + //log.debug("Poll() start, setting statuschanged to false"); m_statusChangedFlag = false; InetAddress addr = (InetAddress)m_pInterface.getAddress(); if (log.isDebugEnabled()) log.debug("poll: starting new poll for " + addr.getHostAddress() + - "/" + m_service.getName() + "/" + m_package.getName()); + "/" + m_service.getName() + "/" + m_package.getName()+" currentStatus "+m_status); // Poll the interface/service pair via the service monitor // @@ -1082,6 +1101,7 @@ { // get the time of the status change // + //log.debug("service "+this.getServiceName()+" status has changed, setting statuschanged to true"); m_statusChangedFlag = true; m_statusChangeTime = System.currentTimeMillis(); @@ -1114,6 +1134,8 @@ break; } } + //} else { + // log.debug("service status "+this.getServiceName()+" has not changed"); } // Set the new status