diff -uNr a/logotron/MANIFEST.TXT b/logotron/MANIFEST.TXT --- a/logotron/MANIFEST.TXT false +++ b/logotron/MANIFEST.TXT 824bfece7088516aaadfdb1d0e37fbae915602d0f5ded55cbc39401af9ceefa8a62f8bf04b35b53811d18a4ec8fea553571a384bf9fdda7d4e76bf31a0d7190d @@ -0,0 +1 @@ +589248 logotron_genesis "Genesis." diff -uNr a/logotron/README.txt b/logotron/README.txt --- a/logotron/README.txt false +++ b/logotron/README.txt 2209f578062ecc5636e67fcb9904cd26204c832ffd170b373773b6b6e1bd1fb85e79e99be44a49bb1aabc99cdae4f6df4cef203bdadbe3773c3586048a61888f @@ -0,0 +1,73 @@ +This is the Aug 2019 draft of S.NSA WWW logotron and IRC bot kit, +as can be seen presently at http://logs.nosuchlabs.com/log . + +To make your own installation, you will need: + +(1) Traditional 'python' 2.7. +(2) 'flask' lib for (1). +(3) 'psycopg2' lib for (1). +(4) 'postgres' (9 or 10). +(5) A WWW server that knows how to proxy. + +To use the kit, you will first need to create a user and DB, e.g.: + +su - postgres +psql + +create user nsabot createdb; +alter role nsabot superuser; +createdb nsalog; +grant all privileges on database nsalog to nsabot ; + +... you can take 'super' away from this user after 1st run, +it is needed in order to let him load the pg_trgm indexer +plugin. + +Next, run 'init_db.sh' (alter the constants to match the +names of your postgres user and the DB), this creates the schema. + +Then see 'eat.sh' and the 'eat_dump.py' it uses, re how to +fill your log archive DB. 'eat_dump.py' eats in Phf's classical +format, e.g.: + +1926177;1564727032;mp_en_viaje;in the meantime, everyone's invited on trilema & other blogs. + +where 1926177 is absolute line index (in given chan), 1564727032 +is unix epochal timestamp, mp_en_viaje is speaker (if he is +'actioning', there will be a * behind his name), and the remainder +of the line is the payload. + +You WILL need to adjust the constants in 'eat_dump.py', it is not +currently capable of eating config file. Set these to your DB +and PG user. + +Now, adjust the constants in 'nsabot.conf' (rename per taste) +to specify your IRC params, name of bot, host at which www +logger will reside, and other knob values. + +Adjust the three 'flask' templates in 'templates' subdir to +give the desired look and feel for the www end. Currently we are using +Phf's classic style sheet, with minor modifications. + +'reader.py' takes one mandatory command-line argument: full path +to the config above. Same for 'bot.py', which is the IRC bot. + +Run these via e.g. nohup ./bot.py & ; nohup ./reader.py & +and let your proxying WWW server know how to reach the latter's port. + +For bot.py you will need a registered nick on fleanode (or wherever +it is used.) There are no fleanode-specific hacks in the bot, ergo +it can be stood up behind ZNC (although this has not been tested.) + +Certain important features are presently unimplemented, in no order: +(1) Backlinkage. +(2) Search result pagination. +(3) Double-quoted search terms. +(4) Paste archiving. +(5) Multi-headed IRC bot for weather resistance. +(6) 'Ecologically clean' export of raw log material. +(7) Informative eggogology for bot commands. +(8) Automatic synchronization with mirrors (see 6) + +A ZNC log eater is also required, to properly fill in the archives. +This is not yet available at the time of this writing. diff -uNr a/logotron/bot.py b/logotron/bot.py --- a/logotron/bot.py false +++ b/logotron/bot.py a168b17f05e71bec4ed700eda6ae588d080f41d1b93842044709fc8349d1a44116692fcccdc10861dc1e4e317f02f7456e70dcf66945fd9451d83b83f09a85e4 @@ -0,0 +1,446 @@ +#!/usr/bin/python + +import ConfigParser, sys, logging, socket, time, re, requests, urllib +from urllib import quote + +# DBism +import psycopg2, psycopg2.extras +import psycopg2.extensions +psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) +psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) +import time, datetime +from datetime import datetime + +############################################################################## + +cfg = ConfigParser.ConfigParser() + +############################################################################## + +# Single mandatory arg: config file path +if len(sys.argv[1:]) != 1: + # If no args, print usage and exit: + print sys.argv[0] + " CONFIG" + exit(0) + +# Read Config +cfg.readfp(open(sys.argv[1])) + +# Get log path +logpath = cfg.get("bofh", "log") + +# Get IRCism debug toggle +irc_dbg = cfg.get("irc", "irc_dbg") +if irc_dbg == 1: + log_lvl = logging.DEBUG +else: + log_lvl = logging.INFO + +# Init logo +logging.basicConfig(filename=logpath, filemode='a', level=log_lvl, + format='%(asctime)s %(levelname)s %(message)s', + datefmt='%d-%b-%y %H:%M:%S') + +# Date format used in log lines +Date_Short_Format = "%Y-%m-%d" + +# Date format used in echoes +Date_Long_Format = "%Y-%m-%d %H:%M:%S" + +############################################################################## +# Get the remaining knob values: + +try: + # IRCism: + Buf_Size = int(cfg.get("tcp", "bufsize")) + Timeout = int(cfg.get("tcp", "timeout")) + TX_Delay = float(cfg.get("tcp", "t_delay")) + Servers = [x.strip() for x in cfg.get("irc", "servers").split(',')] + Port = int(cfg.get("irc", "port")) + Nick = cfg.get("irc", "nick") + Pass = cfg.get("irc", "pass") + Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')] + Join_Delay = int(cfg.get("irc", "join_t")) + Prefix = cfg.get("control", "prefix") + # DBism: + DB_Name = cfg.get("db", "db_name") + DB_User = cfg.get("db", "db_user") + DB_DEBUG = cfg.get("db", "db_debug") + # Logism: + Base_URL = cfg.get("logotron", "base_url") + Era = int(cfg.get("logotron", "era")) + NewChan_Idx = int(cfg.get("logotron", "newchan_idx")) + Src_URL = cfg.get("logotron", "src_url") + +except Exception as e: + print "Invalid config: ", e + exit(1) + +############################################################################## + +# Connect to the given DB +try: + db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User)) +except Exception: + print "Could not connect to DB!" + logging.error("Could not connect to DB!") + exit(1) +else: + logging.info("Connected to DB!") + +############################################################################## + +def close_db(): + db.close() + +def exec_db(query, args=()): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + if (DB_DEBUG): logging.debug("query: '{0}'".format(query)) + if (DB_DEBUG): logging.debug("args: '{0}'".format(args)) + cur.execute(query, args) + +def query_db(query, args=(), one=False): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + if (DB_DEBUG): logging.debug("query: '{0}'".format(query)) + cur.execute(query, args) + rv = cur.fetchone() if one else cur.fetchall() + if (DB_DEBUG): logging.debug("query res: '{0}'".format(rv)) + return rv + +def rollback_db(): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + cur.execute("ROLLBACK") + db.commit() + +def commit_db(): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + db.commit() + + +############################################################################## +# IRCism +############################################################################## + +# Used to compute 'uptime' +time_last_conn = datetime.now() + +# Init socket: +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + +# Set keepalive: +sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) + +# Initially we are not connected to anything +connected = False + +# Connect to given host:port; return whether connected +def connect(host, port): + logging.info("Connecting to %s:%s" % (host, port)) + sock.settimeout(Timeout) + try: + sock.connect((host, port)) + except (socket.timeout, socket.error) as e: + logging.warning(e) + return False + except Exception as e: + logging.exception(e) + return False + else: + logging.info("Connected.") + return True + + +# Attempt connect to each of hosts, in order, on port; return whether connected +def connect_any(hosts, port): + for host in hosts: + if connect(host, port): + return True + return False + + +# Transmit IRC message +def send(message): + global connected + if not connected: + logging.warning("Tried to send while disconnected?") + return False + time.sleep(TX_Delay) + logging.debug("> '%s'" % message) + message = "%s\r\n" % message + try: + sock.send(message.encode("utf-8")) + except (socket.timeout, socket.error) as e: + logging.warning("Socket could not send! Disconnecting.") + connected = False + return False + except Exception as e: + logging.exception(e) + return False + + +# Speak given message on a selected channel +def speak(channel, message): + send("PRIVMSG #%s :%s" % (channel, message)) + # Now save what the bot spoke: + save_line(datetime.now(), channel, Nick, False, message) + + +# Standard incoming IRC line (excludes fleanode liquishit, etc) +irc_line_re = re.compile("""^:([^!]+)\!\S+\s+PRIVMSG\s+\#(\S+)\s+\:(.*)""") + +# The '#' prevents interaction via PM, this is not a PM-able bot. + +# 'Actions' +irc_act_re = re.compile(""".*ACTION\s+(.*)""") + + +# A line was received from IRC +def received_line(line): + # Process the traditional pingpong + if line.startswith("PING"): + send("PONG " + line.split()[1]) + else: + logging.debug("< '%s'" % line) + standard_line = re.search(irc_line_re, line) + if standard_line: + # Break this line into the standard segments + (user, chan, text) = [s.strip() for s in standard_line.groups()] + # Determine whether this line is an 'action' : + action = False + act = re.search(irc_act_re, line) + if act: + action = True + text = act.group(1) + # This line is edible, process it. + eat_logline(user, chan, text, action) + + +# IRCate until we get disconnected +def irc(): + global connected + + # Connect to one among the specified servers, in given priority : + while not connected: + connected = connect_any(Servers, Port) + + # Save time of last successful connect + time_last_conn = datetime.now() + + # Auth to server + send("NICK %s\r\n" % Nick) + send("USER %s %s %s :%s\r\n" % (Nick, Nick, Nick, Nick)) + send("NICKSERV IDENTIFY %s %s\r\n" % (Nick, Pass)) + + time.sleep(Join_Delay) # wait to join until fleanode eats auth + + # Join selected channels + for chan in Channels: + logging.info("Joining channel '%s'..." % chan) + send("JOIN #%s\r\n" % chan) + + while connected: + try: + data = sock.recv(Buf_Size) + except socket.timeout as e: + logging.debug("Listen timed out") + continue + except socket.error as e: + logging.warning("Listen socket error, disconnecting.") + connected = False + continue + except Exception as e: + logging.exception(e) + connected = False + continue + else: + if len(data) == 0: + logging.warning("Listen socket closed, disconnecting.") + connected = False + continue + try: + data = data.strip(b'\r\n').decode("utf-8") + for l in data.splitlines(): + received_line(l) + continue + except Exception as e: + logging.exception(e) + continue + +############################################################################## + +html_escape_table = { + "&": "&", + '"': """, + "'": "'", + ">": ">", + "<": "<", +} + +def html_escape(text): + res = ("".join(html_escape_table.get(c,c) for c in text)) + return urllib.quote(res.encode('utf-8')) + + +searcher_re = re.compile("""(\d+) Results""") + +# Retrieve a search result count using the WWWistic frontend. +# This way it is not necessary to have query parser in two places. +# However it is slightly wasteful of CPU (requires actually loading results.) +def get_search_res(chan, query): + try: + esc_q = html_escape(query) + url = Base_URL + "log-search?q=" + esc_q + "&chan=" + chan + res = requests.get(url).text + t = res[res.find('') + 7 : res.find('')].strip() + found = searcher_re.match(t) + if found: + output = "[" + url + "]" + "[" + found.group(1) + output += """ results for "%s" in #%s]""" % (query, chan) + return output + else: + return """No results found for "%s" in #%s""" % (query, chan) + except Exception as e: + logging.exception(e) + return "No results returned (is logotron WWW up ?)" + +############################################################################## + +# Commands: + +def cmd_help(arg, user, chan): + # Speak the 'help' text + speak(chan, "%s: my valid commands are: %s" % + (user, ', '.join(Commands.keys()))); + +def cmd_search(arg, user, chan): + logging.debug("search: '%s'" % arg) + speak(chan, get_search_res(chan, arg)) + +def cmd_seen(arg, user, chan): + speak(chan, "%s: this command is not yet implemented." % user); + +def cmd_src(arg, user, chan): + speak(chan, "%s: my source code can be seen at: %s" % (user, Src_URL)); + +def cmd_uptime(arg, user, chan): + uptime_txt = "" + uptime = (datetime.now() - time_last_conn) + days = uptime.days + hours = uptime.seconds/3600 + minutes = (uptime.seconds%3600)/60 + uptime_txt += '%dd ' % days + uptime_txt += '%dh ' % hours + uptime_txt += '%dm' % minutes + # Speak the uptime + speak(chan, "%s: time since my last reconnect : %s" % + (user, uptime_txt)); + +Commands = { + "help" : cmd_help, + "s" : cmd_search, + "seen" : cmd_seen, + "uptime" : cmd_uptime, + "src" : cmd_src +} + +############################################################################## + +# Save given line to perma-log +def save_line(time, chan, speaker, action, payload): + ## Put in DB: + try: + # Get index of THIS new line to be saved + last_idx = query_db( + '''select idx from loglines where chan=%s + and idx = (select max(idx) from loglines where chan=%s) ;''', + [chan, chan], one=True) + + # Was this chan unseen previously? + if last_idx == None: + cur_idx = NewChan_Idx # Then use the config'd start index + else: + cur_idx = last_idx['idx'] + 1 # Otherwise, get the next idx + + logging.debug("Adding log line with index: %s" % cur_idx) + + # Set up the insert + exec_db('''insert into loglines (idx, t, chan, era, + speaker, self, payload) values (%s, %s, %s, %s, %s, %s, %s) ; ''', + [cur_idx, time, chan, Era, speaker, action, payload]) + + # Fire + commit_db() + except Exception as e: + rollback_db() + logging.warning("DB add failed, rolled back.") + logging.exception(e) + + +# RE for finding log refs +logref_re = re.compile(Base_URL + """log\/([^/]+)/([^/]+)#(\d+)""") + + +# All valid received lines end up here +def eat_logline(user, chan, text, action): + # If somehow received line from channel which isn't in the set: + if chan not in Channels: + logging.warning( + "Received martian : '%s' : '%s'" % (chan, text)) + return + + # First, add the line to the log: + save_line(datetime.now(), chan, user, action, text) + + # Then, see if the line was a command for this bot: + if text.startswith(Prefix): + cmd = text.partition(Prefix)[2].strip() + cmd = [x.strip() for x in cmd.split(' ', 1)] + if len(cmd) == 1: + arg = "" + else: + arg = cmd[1] + # Dispatch this command... + command = cmd[0] + logging.debug("Dispatching command '%s' with arg '%s'.." % + (command, arg)) + func = Commands.get(command) + # If this command is undefined: + if func == None: + logging.debug("Invalid command: %s" % command) + # Utter the 'help' text as response to the sad command + cmd_help("", user, chan) + else: + # Is defined command, dispatch it: + func(arg, user, chan) + else: + # Finally, see if contains log refs: + for ref in re.findall(logref_re, text): + ref_chan, ref_date, ref_idx = ref + # Find this line in DB: + ref_line = query_db( + '''select t, speaker, payload from loglines + where chan=%s and idx=%s;''', + [ref_chan, ref_idx], one=True) + # If retrieved line is valid, echo it: + if ref_line != None: + time_txt = ref_line['t'].strftime(Date_Long_Format) + my_line = "Logged on %s %s: %s" % (time_txt, + ref_line['speaker'], + ref_line['payload']) + # Speak the line echo into the chan where ref was seen + speak(chan, my_line) + +############################################################################## + +# IRCate; if disconnected, reconnect +def run(): + while 1: + irc() + logging.warning("Disconnected, will reconnect...") + +############################################################################## + +# Run continuously. +run() + +############################################################################## diff -uNr a/logotron/eat.sh b/logotron/eat.sh --- a/logotron/eat.sh false +++ b/logotron/eat.sh e1f3e840c65ecaedda97fffe268490f1db0b2d2ed7541dbe00982cd49f01fad23b2d9ca9ba754550e6855051a28afde34e22bd4704a0e0f49abca66e9b58c0a3 @@ -0,0 +1 @@ +for f in phf/*.txt; do ./eat_dump.py $f trilema 2 ; done diff -uNr a/logotron/eat_dump.py b/logotron/eat_dump.py --- a/logotron/eat_dump.py false +++ b/logotron/eat_dump.py 5614d6523b1512656953c12732db5daa56b49288251b879427a9b8e33da7db95847e441d2ad007896182c5acb27f0ed808b072a25c12b4789cf85cc186e68f68 @@ -0,0 +1,104 @@ +#!/usr/bin/python + +############################################################################## +import psycopg2, psycopg2.extras +import psycopg2.extensions +psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) +psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) +import re +import time +import datetime +from datetime import datetime +import sys +import os + +# Debug Knob +DB_DEBUG = False +############################################################################## + +############################################################################## +db = psycopg2.connect("dbname=nsalog user=nsabot") ## CHANGE THESE + +def close_db(): + db.close() + +def exec_db(query, args=()): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + if (DB_DEBUG): print "query: '{0}'".format(query) + if (DB_DEBUG): print "args: '{0}'".format(args) + if (DB_DEBUG): print "EXEC:" + cur.execute(query, args) + +def rollback_db(): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + cur.execute("ROLLBACK") + db.commit() + +def commit_db(): + cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + db.commit() + +############################################################################## + +# Eat individual line of a Phf-style log dump +def eat_logline(line, chan, era): + match = re.search("(\d+)\;(\d+)\;([^;]+)\;(.*$)", line) + if match: + g = match.groups() + self_speak = False + + try: + idx = int(g[0]) # Serial Number of Log Line + time = int(g[1]) # Unix Epochal Time of Log Line + except Exception, e: + print("Malformed Line! '" + line +"' ! : " + e) + close_db() + exit(1) + + speaker = g[2] # Name of Speaker + payload = g[3] # Payload (remainder of line) + + ## If spoken line is of form "* user ..." : + if speaker == "*": + spl = payload.split(' ', 1) + speaker = spl[0] + payload = spl[1] + self_speak = True + + ## Put in DB: + try: + exec_db('''insert into loglines (idx, t, chan, era, speaker, self, payload) + values (%s, %s, %s, %s, %s, %s, %s) ; ''', + [int(idx), datetime.fromtimestamp(time), str(chan), int(era), str(speaker), + bool(self_speak), str(payload)]) + commit_db() + except psycopg2.IntegrityError as e: + rollback_db() + print "Dupe Ignored, Idx=", idx + else: + print("Malformed Line! '" + line +"' !") + close_db() + exit(1) + + +# Eat Phf-style log dump at given path +def eat_dump(path, chan, era): + with open(path) as fp: + for line in fp: + eat_logline(line, chan, era) + + +############################################################################## + +if (len(sys.argv) == 4): + logdump = sys.argv[1] # Path to Phf-style log dump + chan = sys.argv[2] # Chan Name + era = sys.argv[3] # Era (integer) + # Eat: + eat_dump(logdump, chan, era) + close_db() +else: + print "Usage: ./eat_dump LOGFILE CHAN ERA" + exit(0) + +############################################################################## diff -uNr a/logotron/init_db.sh b/logotron/init_db.sh --- a/logotron/init_db.sh false +++ b/logotron/init_db.sh 63546f44db3fd7f6c48a1bee1439f2a67d3a3de6fef7eed536e8e7e95875ec53186e0383608afaf0635e4660d5dbeecaef9e620ee53369a8ea3aa148ae8ccbef @@ -0,0 +1,3 @@ +#!/bin/bash + +psql -U nsabot -d nsalog -a -f nsalog_schem.sql diff -uNr a/logotron/nsabot.conf b/logotron/nsabot.conf --- a/logotron/nsabot.conf false +++ b/logotron/nsabot.conf f447f004041538581919c4d373faa6e957d8ed67e273a93704b952802eeec970213983fb8fbe45e8a913b8831d5c1dbb605d8309df1f70559969b9b1b9b7685d @@ -0,0 +1,67 @@ +[bofh] + +# Path to IRC bot debuggism log +log = nsabot.log + +[irc] +servers = irc.freenode.net +port = 6667 + +# Bot's nick (change to yours, as with all knobs) +nick = snsabot + +# All chans for both www end and bot, go here: +chans = asciilifeform-test, asciilifeform-test-2 + +# IRC nick PW +pass = YOURFLEANODEPW + +# How long to wait for fleanode to ack auth of nick before joining chans +join_t = 20 + +# Verbose barf of irc tx/rx +irc_dbg = 0 + +[tcp] +bufsize = 4096 + +# Recv timeout +timeout = 30 + +# Delay between IRC transmits - possibly ought to be longer +t_delay = 0.1 + +[control] +# Command Trigger for IRC bot +prefix = !q + +[logotron] +# The current era. +era = 3 +# Convention for these : +# 1 : Age of #b-a (and earlier dark age material) +# 2 : Phf's (and several variously-reliable) loggers +# 3 : Present day. + +# Where the source lives (change to yours) +src_url = http://not.yet + +# From where index starts for new chan, so to leave room for archive insert +newchan_idx = 1000000 + +# Base URL of logtron site (change to yours!) +base_url = http://logs.nosuchlabs.com/ + +# Other people's bots (for colouration strictly) +bots = a111, deedbot, feedbot, auctionbot, lobbesbot + +# On what port will sit the www logtron +www_port = 5002 + +[db] +# Change to your DB (set it up so only answers locally) +db_name = nsalog +db_user = nsabot + +# Verbose barf of DB transactions +db_debug = 0 diff -uNr a/logotron/nsalog_schem.sql b/logotron/nsalog_schem.sql --- a/logotron/nsalog_schem.sql false +++ b/logotron/nsalog_schem.sql 2fc6a536ec6c147d53ee7a9ec50e947e4fb9f479bb108f23df3329ed83c3479a622ee618e41e721cb2fa8efac189d142743ab5ce3c3f4a6e90b851bd3dc9dcea @@ -0,0 +1,29 @@ +drop table if exists loglines; +create table loglines ( + ser serial, + idx integer not null, + t timestamp, + chan text not null, + era integer not null, + speaker text not null, + self boolean, + payload text not null, + backlinks integer[], + PRIMARY KEY(idx, chan), + UNIQUE(idx, chan) +); + + +create index logline_idx_i on loglines(idx); +create index logline_t_i on loglines(t); +create index logline_chan_i on loglines(chan); +create index logline_era_i on loglines(era); +create index logline_speaker_i on loglines(speaker); +create index logline_payload_i on loglines(payload); + +CREATE EXTENSION pg_trgm; + +-- drop index payload_search_idx; + +create index concurrently payload_search_idx +ON loglines USING gin (payload gin_trgm_ops); diff -uNr a/logotron/reader.py b/logotron/reader.py --- a/logotron/reader.py false +++ b/logotron/reader.py 5de963eb326e8f107264fb5d2dceaf715b8daff649353295ff19bfaa560946bd856f8970c69b1b6360fb003b7548fa78302423ecf83512a4bff43cfd3973f628 @@ -0,0 +1,441 @@ +#!/usr/bin/python + +############################################################################## +import ConfigParser, sys +import psycopg2, psycopg2.extras +import psycopg2.extensions +psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) +psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) +import time +import datetime +from datetime import timedelta +import sys +reload(sys) +sys.setdefaultencoding('utf8') +import os +import threading +import re +from datetime import datetime +from urlparse import urljoin +from flask import Flask, request, session, url_for, redirect, \ + render_template, abort, g, flash, _app_ctx_stack, make_response, \ + jsonify +from flask import Flask +from flask.ext.cache import Cache +############################################################################## + +############################################################################## +# Single mandatory arg: config file path +if len(sys.argv[1:]) != 1: + # If no args, print usage and exit: + print sys.argv[0] + " CONFIG" + exit(0) + +# Read Config from given conf file +config_path = os.path.abspath(sys.argv[1]) +cfg = ConfigParser.ConfigParser() +cfg.readfp(open(config_path)) + +try: + # IRCism: + Nick = cfg.get("irc", "nick") + Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')] + Bots = [x.strip() for x in cfg.get("logotron", "bots").split(',')] + Bots.append(Nick) # Add our own bot to the bot list + # DBism: + DB_Name = cfg.get("db", "db_name") + DB_User = cfg.get("db", "db_user") + DB_DEBUG = cfg.get("db", "db_debug") + # Logism: + Base_URL = cfg.get("logotron", "base_url") + Era = int(cfg.get("logotron", "era")) + # WWW: + WWW_Port = int(cfg.get("logotron", "www_port")) + +except Exception as e: + print "Invalid config: ", e + exit(1) + +############################################################################## + +############################################################################## +### Knobs not made into config yet ### +Default_Chan = Channels[0] +Min_Query_Length = 3 +Max_Search_Results = 1000 + +## Format for Date in Log Lines +Date_Short_Format = "%Y-%m-%d" + +## WWW Debug Knob +DEBUG = False +############################################################################## + +app = Flask(__name__) +cache = Cache(app,config={'CACHE_TYPE': 'simple'}) +app.config.from_object(__name__) + +def get_db(): + db = getattr(g, 'db', None) + if db is None: + db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User)) + return db + +def close_db(): + if hasattr(g, 'db'): + g.db.close() + +@app.before_request +def before_request(): + g.db = get_db() + +@app.teardown_request +def teardown_request(exception): + close_db() + +def query_db(query, args=(), one=False): + cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) + if (DB_DEBUG): print "query: '{0}'".format(query) + cur.execute(query, args) + rv = cur.fetchone() if one else cur.fetchall() + if (DB_DEBUG): print "query res: '{0}'".format(rv) + return rv + +def exec_db(query, args=()): + cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) + if (DB_DEBUG): print "query: '{0}'".format(query) + if (DB_DEBUG): print "args: '{0}'".format(args) + if (DB_DEBUG): print "EXEC:" + cur.execute(query, args) + +def getlast_db(): + cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) + cur.execute('select lastval()') + return cur.fetchone()['lastval'] + +def commit_db(): + cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor) + g.db.commit() + +############################################################################## + +## All eggogs redirect to main page +@app.errorhandler(404) +def page_not_found(error): + return redirect(url_for('log')) + +############################################################################## + +html_escape_table = { + "&": "&", + '"': """, + "'": "'", + ">": ">", + "<": "<", +} + +def html_escape(text): + return "".join(html_escape_table.get(c,c) for c in text) + +############################################################################## + +## Get base URL +def get_base(): + if DEBUG: + return request.host_url + return Base_URL + + +# Get perma-URL corresponding to given log line +def line_url(l): + return "{0}log/{1}/{2}#{3}".format(get_base(), + l['chan'], + l['t'].strftime(Date_Short_Format), + l['idx']) + +def gen_chanlist(selected_chan): + # Get current time + now = datetime.now() + + s = """""" + for chan in Channels: + chan_formed = chan + if chan == selected_chan: + chan_formed = "" + chan + "" + s += """""".format( + get_base(), chan, chan_formed) + s += "" + + for chan in Channels: + + last_time = query_db( + '''select t, idx from loglines where chan=%s + and idx = (select max(idx) from loglines where chan=%s) ;''', + [chan, chan], one=True) + + last_time_txt = "" + if last_time != None: + span = (now - last_time['t']) + days = span.days + hours = span.seconds/3600 + minutes = (span.seconds%3600)/60 + + if days != 0: + last_time_txt += '%dd ' % days + if hours != 0: + last_time_txt += '%dh ' % hours + if minutes != 0: + last_time_txt += '%dm' % minutes + + s += """""".format( + get_base(), + chan, + last_time['t'].strftime(Date_Short_Format), + last_time['idx'], + last_time_txt) + + else: + last_time_txt = "" + s += "" + + s += "
{2}
{4}
" + return s + + +# Make above callable from inside htm templater: +app.jinja_env.globals.update(gen_chanlist=gen_chanlist) + + +# HTML Tag Regex +tag_regex = re.compile("(<[^>]+>)") + + +# Find the segments of a block of text which constitute HTML tags +def get_link_intervals(str): + links = [] + span = [] + for match in tag_regex.finditer(str): + span = match.span() + links += [span] + return links + + +# Highlight all matched tokens in given text +def highlight_matches(strings, text): + e = '(' + ('|'.join(strings)) + ')' + return re.sub(e, + r"""\1""", + text, + flags=re.I) + + +# Highlight matched tokens in the display of a search result logline, +# but leave HTML tags alone +def highlight_text(strings, text): + result = "" + last = 0 + for i in get_link_intervals(text): + i_start, i_end = i + result += highlight_matches(strings, text[last:i_start]) + result += text[i_start:i_end] # the HTML tag, leave it alone + last = i_end + result += highlight_matches(strings, text[last:]) # last block + return result + + +# Regexps used in format_logline: +boxlinks_re = re.compile('\[\s*[^ <]+\s*\]\[([^\[\]]+)\]') +stdlinks_re = re.compile('(http[^ \[\]]+)') + + +## Format given log line for display +def format_logline(l, highlights = []): + payload = html_escape(l['payload']) + + # Format ordinary links: + payload = re.sub(stdlinks_re, r'\1', payload) + + # Now also format [link][text] links : + payload = re.sub(boxlinks_re, r'\2', payload) + + # If this is a search result, illuminate the matched strings: + if highlights != []: + payload = highlight_text(highlights, payload) + + bot = "" + if l['speaker'] in Bots: + bot = " bot" + + # HTMLize the given line : + s = ("
" + "{1}: {4}
").format(l['idx'], + l['speaker'], + l['t'], + line_url(l), + payload, + bot) + + return s + +# Make above callable from inside htm templater: +app.jinja_env.globals.update(format_logline=format_logline) + + +# Generate navbar for the given date: +def generate_navbar(date, tail, chan): + cur_day = datetime.strptime(date, Date_Short_Format) + prev_day = cur_day - timedelta(days=1) + prev_day_txt = prev_day.strftime(Date_Short_Format) + + s = "← {2}".format( + get_base(), + chan, + prev_day_txt) + + if not tail: + next_day = cur_day + timedelta(days=1) + next_day_txt = next_day.strftime(Date_Short_Format) + s = s + " | {2} →".format( + get_base(), + chan, + next_day_txt) + + return s + +# Make above callable from inside htm templater: +app.jinja_env.globals.update(generate_navbar=generate_navbar) + + +@app.route('/log//') +@app.route('/log/', defaults={'date': None}) +@app.route('/log/', defaults={'chan': Default_Chan, 'date': None}) +@app.route('/log', defaults={'chan': Default_Chan, 'date': None}) +def log(chan, date): + # Handle rubbish chan: + if chan not in Channels: + return redirect(url_for('log')) + + # Get current time + now = datetime.now() + + # Whether we are viewing 'current' tail + tail = False + + # If viewing 'current' log: + if date == None: + date = now.strftime(Date_Short_Format) + tail = True + + # Parse given date, and redirect to default log if rubbish: + try: + day_start = datetime.strptime(date, Date_Short_Format) + except Exception, e: + return redirect(url_for('log')) + + # Determine the end of the interval being shown + day_end = day_start + timedelta(days=1) + + # Get the loglines from DB + lines = query_db( + '''select * from loglines where chan=%s + and t between %s and %s order by idx asc;''', + [chan, day_start, day_end], one=False) + + # Return the HTMLized text + return render_template('log.html', + chan = chan, + loglines = lines, + date = date, + tail = tail) + + + +Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-" + +def sanitize_speaker(s): + return "".join([ch for ch in s if ch in Name_Chars]) + + +def re_escape(s): + return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s) + +# Search knob. Supports 'chan' parameter. +@app.route('/log-search') +def logsearch(): + # The query params: + chan = request.args.get('chan', default = Default_Chan, type = str) + query = request.args.get('q', default = '', type = str) + # page_num = request.args.get('page', default = 0, type = int) + + # Handle rubbish chan: + if chan not in Channels: + return redirect(url_for('log')) + + nres = 0 + searchres = [] + tokens_orig = [] + search_head = "Query is too short!" + # Forbid query that is too short: + if len(query) >= Min_Query_Length: + # Get the search tokens to use: + tokens = query.split() + tokens_standard = [] + from_users = [] + + # separate out "from:foo" tokens and ordinary: + for t in tokens: + if t.startswith("from:") or t.startswith("f:"): + from_users.append(t.split(':')[1]) # Record user for 'from' query + else: + tokens_standard.append(t) + + from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users] + tokens_orig = [re_escape(t) for t in tokens_standard] + tokens_formed = ['%' + t + '%' for t in tokens_orig] + + # Query is usable; perform the search on DB and get the finds + if from_users == []: + searchres = query_db( + '''select * from loglines where chan=%s + and payload ilike all(%s) order by idx desc limit %s;''', + [chan, + tokens_formed, + Max_Search_Results], one=False) + else: + print "from=", from_users + + searchres = query_db( + '''select * from loglines where chan=%s + and speaker ilike any(%s) + and payload ilike all(%s) order by idx desc limit %s;''', + [chan, + from_users, + tokens_formed, + Max_Search_Results], one=False) + + + # Number of entries found + nres = len(searchres) + search_head = "{0} entries found in {1} for '{2}' :".format( + nres, chan, html_escape(query)) + + # No paging support just yet: + return render_template('searchres.html', + query = query, + nres = nres, + chan = chan, + search_head = search_head, + tokens = tokens_orig, + loglines = searchres) + + +# Comment this out if you don't have one +@app.route('/favicon.ico') +def favicon(): + return redirect(url_for('static', filename='favicon.ico')) + + +## App Mode +if __name__ == '__main__': + app.run(threaded=True, port=WWW_Port) diff -uNr a/logotron/static/README b/logotron/static/README --- a/logotron/static/README false +++ b/logotron/static/README d89bc958b029448a46d627da724957403b8db562b3a8756236142341a5a998212440f9e5a5d5679633d89f19c5f07217195165e4c7931ce57697bb678bea6fe4 @@ -0,0 +1 @@ +favicon.ico goes in this dir. diff -uNr a/logotron/templates/layout.html b/logotron/templates/layout.html --- a/logotron/templates/layout.html false +++ b/logotron/templates/layout.html 587ccf877ba1bff6dfa4547ebfed8fae1b071cf243fa4bd77f56f91c0a1b6a692137ff9dc5b458c34783ac632bcb83abac664073ab82d79649709810f4cf9c0a @@ -0,0 +1,127 @@ + + + + + {% block title %} + {% endblock %} + + + + + + + + +

+ + + + + + + + +
+ + No Such lAbs + + + {{ gen_chanlist( chan ) | safe }} + + + Pizarro + +
+ +

+ +
+ + + +
+ + {% block body %}{% endblock %} + + + + diff -uNr a/logotron/templates/log.html b/logotron/templates/log.html --- a/logotron/templates/log.html false +++ b/logotron/templates/log.html 44d51aaef738815d01d92cbf6a1d0d44803627579352dbaafe0257346e5fdff9d76ec64fd00ebb435e177869502f97c1b72c7322ee6108c6784011d222966ba7 @@ -0,0 +1,17 @@ +{% extends "layout.html" %} + +{% block title %} +#{{ chan }} | {{ date }} +{% endblock %} + +{% block body %} + + + +{% for l in loglines %} +{{ format_logline(l) | safe }} +{% endfor %} + + + +{% endblock %} diff -uNr a/logotron/templates/searchres.html b/logotron/templates/searchres.html --- a/logotron/templates/searchres.html false +++ b/logotron/templates/searchres.html a60ca105a579ed2b256dbfc92a7ca7468d17ba875a3215217bb1bb2ea15ac04e9e56c66205b1a4387263b068c39f9d72f08e76edce2b371f4f1aebe24bbf7f03 @@ -0,0 +1,17 @@ +{% extends "layout.html" %} + +{% block title %} +{{ nres }} Results for {{ query }} in #{{ chan }} +{% endblock %} + +{% block body %} + +
{{ search_head | safe }}
+ +
+ +{% for l in loglines %} +{{ format_logline(l, tokens) | safe }} +{% endfor %} + +{% endblock %}