#!/usr/bin/ruby # # youtube-scanner.rb xuzo@cuarentaydos.com # # v 0.0.1 Initial version. # Gaim, Gajim, Firefox and ClawsMail scanners. # Youtube URL detection. # Cache and error cache support. # # vim: ts=2 # require 'sqlite3' # GajimScanner require 'net/http' # HTTP library # # Cache class for store results # class Cache def initialize(file) @data = {} @file = file # Read cache if FileTest.exists?(@file) and FileTest.file?(@file) f = File.new(@file,"r") f.each { |line| url, title = line.chomp.split(/\s+/, 2) @data[url] = title } f.close() end end def add(url,title = nil) if title != nil title.chomp! end @data[url] = title end def del(url) @data.delete([url]) end def get(url) return @data[url] end def flush f = File.new(@file, 'w+') @data.each do |url,title| if not url.nil? and not url.empty? f.write("#{url} #{title}\n") end end f.close() end end class Url attr_accessor :url, :provider, :data def initialize(url, provider = nil, data = {}) @url = url @provider = provider @data = data end def to_s return @url end def <=>(other_url) return @url <=> other_url.url end def ==(other_url) return @url == other_url.url end def ===(other_url) return @url == other_url.url end end # # Base class for backends # class Scanner def scan(location) ret = [] lineno = 1 File.new(location, "r").each do |line| if (url = self.is_youtube?(line)) != nil ret.push(Url.new(url,'Scanner', {:file => location, :line => lineno})) end lineno = lineno + 1 end return ret.uniq end def is_youtube?(string) start_url = string.index('youtube.com/watch?') if start_url.nil? return nil end string = string[start_url..-1] # Cortar por espacio end_url = string.index(' ', start_url) if not end_url.nil? string[0..end_url-1] end # Buscar el parametro v en la url v_param = string.index(/v=[A-Za-z0-9_\-\=]{11}/) if not v_param.nil? return "http://youtube.com/watch?" + string[v_param..v_param+12] end end def ls_R(location) ret = { :directories => [], :files => [] } if not FileTest.exists?(location) return [] end if FileTest.file?(location) ret[:files].push(location) elsif FileTest.directory?(location) entries = Dir.entries(location) entries.slice!(0, 2) # Skip '.' and '..' entries.each do |e| fullpath = location + '/' + e if FileTest.directory?(fullpath) ret[:directories].push(fullpath) subs = self.ls_R(fullpath) ret[:directories] += subs[:directories] ret[:files] += subs[:files] elsif FileTest.file?(fullpath) ret[:files].push(fullpath) end end return ret end end end # # Backends # class GaimScanner < Scanner def scan(location = ENV['HOME'] + '/.gaim') location += '/logs' if not FileTest.directory?(location) return [] end ret = [] entries = self.ls_R(location) entries[:files].delete_if do |e| e.index(".txt", -4) == nil end entries[:files].each do |file| ret += super(file) end ret.each { |url| url.provider = self.class } return ret end end class GajimScanner < Scanner def scan(location = ENV['HOME'] + '/.gajim') location += '/logs.db' if not FileTest.file?(location) return [] end ret = [] db = SQLite3::Database.new(location) res = db.query("SELECT log_line_id,message FROM logs WHERE message LIKE '%youtube.com/watch%'") res.each do |row| if (url = self.is_youtube?(row[1])) != nil ret.push(Url.new(url, 'GajimScanner', { :file => location, :id => row[0] })) end end return ret end end class ClawsMailScanner < Scanner def scan(location = ENV['HOME'] + '/.sylpheed-claws' ) location += '/imapcache' ret = [] paths = self.ls_R(location) paths[:files].each do |path| ret += super(path) end ret.each do |url| url.provider = self.class end return ret end end class FirefoxScanner < Scanner def scan(location = ENV['HOME'] + '/.mozilla/firefox') ret = [] Dir.foreach(location) do |profile| if /^[A-Za-z0-9]{8}\.default$/.match(profile) \ and FileTest.exists?(location + '/' + profile + '/history.dat') ret += super(location + '/' + profile + '/history.dat') end end ret.each do |url| url.provider = self.class end end end # # HACK: Uniq over urls # def custom_uniq(urls) tmp = {} urls.each do |url| tmp[url.url] = url end ret = [] tmp.each_value do |obj| ret.push(obj) end return ret end if __FILE__ == $0 urls = [] urls += GajimScanner.new.scan() urls += ClawsMailScanner.new.scan() urls += FirefoxScanner.new.scan() urls += GaimScanner.new.scan() cache = Cache.new(ENV['HOME'] + '/.video-cache') error_cache = Cache.new(ENV['HOME'] + '/.video-cache.error') conn = Net::HTTP.new('www.youtube.com', 80) urls = custom_uniq(urls) urls.each do |url| in_cache = true title = cache.get(url.url) if title.nil? if error_cache.get(url.url) $stderr.puts "[E] URL is blacklisted in error cache #{url}" next end path_idx = url.url.index('/watch?') if path_idx.nil? $stderr.puts "[E] Cannot find /watch? in [#{url}]" # puts "[D] #{url.inspect}" next end path = url.url[path_idx..-1] resp, data = conn.get(path) if resp.code != "200" $stderr.puts "[E] Server sais: #{resp.code}: #{resp.message} for #{url}" error_cache.add(url.url, nil) # puts "[D] #{url.inspect}" next end start_pos = data.index('