# www.jquigley.com # Fri Aug 10 21:19:32 CDT 2007 import os import re import time import logging from paste import request from paste import wsgiwrappers from search.chump import ChumpQuery from search.engine import DbxmlEngine from search.engine import InotifyWatcher import toil #------------------------------------------------------------------------------ # Search application class Application(toil.ApplicationInterface): logger = logging.getLogger('Application') def __init__(self, conf): self.conf = conf self.debug = conf.getboolean('TOIL', 'debug') self.corpus = list() self.templates = dict() self.isInitialized = False def filter_fun(path): return os.path.isfile(path) and filter_fun.regexp.search(path) filter_fun.regexp = re.compile(self.conf.get('APP','corpus.regex')) self.engine = DbxmlEngine(conf.get('APP','path.container'), ChumpQuery, filter_fun) self.watcher = InotifyWatcher(self.conf.get('APP','path.corpus'), self.engine, filter_fun) def initialize(self): self.engine.initialize() self._loadTemplates() #TODO: self.logger.trace('successfully loaded templates: ' + repr(self.templates)) # TODO: allow path.corpus to specify multiple paths, separated by ":" self.engine.indexCorpus(self.conf.get('APP','path.corpus')) self.watcher.start() self.isInitialized = True def handler(self, environ, start_response): start_time = time.time() host = environ['REMOTE_ADDR'] port = environ['REMOTE_PORT'] req = environ['SCRIPT_URL'] query = environ['QUERY_STRING'] root = self.conf.get('APP', 'web.root') self.logger.info('handling request %s from %s:%s' % (req,host, port)) #self.logger.trace('request environ: %(environ)s' % locals()) # reload templates with every request when we're in debug mode if self.debug: self._loadTemplates() # dispatch on the URL, default response is the index # TODO: improve this with chump-specific implementations of toil request handlers isxml = False if req == root + '/advanced': content = self.templates['advanced'] elif req == root + '/find': isxml = True content = self.engine.find(request.parse_querystring(environ)) elif req == root + '/stats': raise NotImplementedError, "statistics not implemented" else: content = self.templates['index'] response = wsgiwrappers.WSGIResponse() # wrap the content with headers and footers and # set content-type depending on type of output if isxml: content = self.templates['xml-header'] + content + self.templates['xml-footer'] response.headers['Content-Type'] = 'text/xml' else: content = self.templates['header'] + content + self.templates['footer'] response.write(content) self.logger.debug('request processing duration: %fs' % (time.time() - start_time)) return response(environ, start_response) def shutdown(self): self.logger.info('caught shutdown signal') # TODO: is there anything else that needs to be done to ensure clean dbxml term? self.engine.sync() self.watcher.stop() self.watcher.join() def _loadTemplates(self): # for (l-value, r-value) in [APP] whose l-value match /^path.tmpl/ and # r-value is an existing file, read in data to self.templates of type dict for l,r in self.conf.items('APP'): if re.search('^path.tmpl', l) and os.path.isfile(r): fin = file(r) self.templates[re.split('\.',l)[-1]] = fin.read() fin.close() #self.logger.trace('loaded template: ' + os.path.basename(r)) self.logger.debug('successfully loaded page templates') # EOF