Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Return Number of Errors From Splunk Search in Python

Tags:

python

splunk

Is there any way to get the number of errors that occurred during a Splunk search with the splunklib.results module or any of the splunklib modules?

Below, is my code so far:

#purpose of script: To connect to Splunk, execute a query, and write the query results out to an excel file.
#query results = multiple dynamic # of rows. 7 columns. 

#!/usr/bin/env python
import splunklib.client as client #splunklib.client class is used to connect to splunk, authenticate, and maintain session
import splunklib.results as results #module for returning results and printing/writing them out

listOfAppIDs = []
#open file to read each line and add each line in file to an array. These are our appID's to search
with open('filelocation.txt', 'r') as fi:
    for line in fi:
        listOfAppIDs.append(line.rstrip('\n'))
print listOfAppIDs

#identify variables used to log in
HOST = "8.8.8.8"
PORT = 8089
USERNAME = "uName"
PASSWORD = "pWord"

startPoint = "appID1" #initial start point in array

outputCsv = open('filelocation.csv', 'wb')
fieldnames = ['Application ID', 'transport', 'dst_port', 'Average Throughput per Month','Total Sessions Allowed', 'Unique Source IPs', 'Unique Destination IPs']
writer = csv.DictWriter(outputCsv, fieldnames=fieldnames)
writer.writeheader();

def connect():
    global startPoint , item
    print "startPoint: " + startPoint

    #Create a service instance by using the connect function and log in
    service = client.connect(
        host=HOST,
        port=PORT,
        username=USERNAME,
        password=PASSWORD,
        autologin=True
    )   
    jobs = service.jobs# Get the collection of jobs/searches
    kwargs_blockingsearch = {"exec_mode": "normal"}

    try:
        for item in listOfAppIDs:
            errorCount=0
            print "item: " + item
            if (item >= startPoint):    
                searchquery_blocking = "search splunkQery"
                print item + ':'
                job = jobs.create(searchquery_blocking, **kwargs_blockingsearch) # A blocking search returns query result. Search executes here
                print "Splunk query for appID " , item , " completed! \n"
                resultCount = job["resultCount"] #number of results this job (splunk query) returned
                print "result count " , resultCount
                rr = results.ResultsReader(job.results())
                for result in rr:
                    if isinstance(result, results.Message):
                        # Diagnostic messages may be returned in the results
                        # Check the type and do something.
                        if result.type == log_type:
                            print '%s: %s' % (result.type, result.message)
                            errorCount+=1
                    elif isinstance(result, dict):
                        # Normal events are returned as dicts
                        # Do something with them if required.
                        print result
                        writer.writerow([result + errorCount])
                        pass
                assert rr.is_preview == False
    except:
        print "\nexcept\n"
        startPoint = item #returh to connect function but start where startPoint is at in array
        connect()

   print "done!"    

connect()

I get the following error with the above code:

'OrderedDict' object has no attribute 'messages'

like image 977
pHorseSpec Avatar asked Mar 09 '16 15:03

pHorseSpec


1 Answers

from splunklib import results
my_feed=results.ResultsReader(open("results.xml"))

log_type='ERROR'

n_errors=0
for result in my_feed.results:
    if isinstance(result, results.Message):
       if result.type==log_type:
          print result.message
          n_errors+=1

You may have issues with data.load() as it requires an xml with a single root node. If you have multiple results nodes in one feed can work around this wrapping your feed, ie: "<root>+open("feed.xml").read()</root>"

If you have access to the raw feed instead of a data object, you may use lxml insted of splunk lib

len( lxml.etree.parse("results.xml").findall("//messages/msg[@type='ERROR']") )

The following is a complete example based on splunklib documentation. ResultsReader parses the atom feed and calls data.load() on each result for you.

      import splunklib.client as client
      import splunklib.results as results
      from time import sleep

      log_type='ERROR'

      service = client.connect(...)
      job = service.jobs.create("search * | head 5")
      while not job.is_done():
          sleep(.2)
      rr = results.ResultsReader(job.results())
      for result in rr:
          if isinstance(result, results.Message):
              # Diagnostic messages may be returned in the results
              # Check the type and do something.
              if result.type == log_type:
                 print '%s: %s' % (result.type, result.message)
          elif isinstance(result, dict):
              # Normal events are returned as dicts
              # Do something with them if required.
              pass
      assert rr.is_preview == False
like image 74
xvan Avatar answered Oct 26 '22 23:10

xvan