Improved highlighted code caching and performance

- Pygments caching is broken into a couple utility functions for greater flexibility.
- Now Gist cache files are hashed against their gist id and parameters.
- Raw gists are no longer cached, only processed gists.
- Gist caches are still stored in .gist-cache/ for more atomic cleanup.

Note: For the docs, caching speeds up generation by about 1800 milliseconds on my machine.
This commit is contained in:
Brandon Mathis 2012-12-24 13:51:05 -06:00
parent ed186b18a2
commit 66a883f2af
2 changed files with 67 additions and 57 deletions

View File

@ -9,6 +9,7 @@
require 'cgi'
require 'digest/md5'
require 'net/https'
require 'fileutils'
require 'uri'
require './plugins/pygments_code'
@ -20,16 +21,19 @@ module Jekyll
@cache_disabled = false
@cache_folder = File.expand_path "../.gist-cache", File.dirname(__FILE__)
options = parse_markup(markup)
@lang = options[:lang]
@title = options[:title]
@lineos = options[:lineos]
@marks = options[:marks]
@url = options[:url]
@link_text = options[:link_text]
@start = options[:start]
@end = options[:end]
@markup = clean_markup(markup)
opts = parse_markup(markup)
@markup = clean_markup(markup)
@options = {
lang: opts[:lang],
title: opts[:title],
lineos: opts[:lineos],
marks: opts[:marks],
url: opts[:url],
link_text: opts[:link_text],
start: opts[:start],
end: opts[:end]
}
FileUtils.mkdir_p @cache_folder
end
@ -37,23 +41,31 @@ module Jekyll
def render(context)
if parts = @markup.match(/([\d]*) (.*)/)
gist, file = parts[1].strip, parts[2].strip
code = get_cached_gist(gist, file) || get_gist_from_web(gist, file)
length = code.lines.count
@start ||= 1
@end ||= length
return "#{file} is #{length} lines long, cannot begin at line #{@start}" if @start > length
return "#{file} is #{length} lines long, cannot read beyond line #{@end}" if @end > length
if @start > 1 or @end < length
code = code.split(/\n/).slice(@start -1, @end + 1 - @start).join("\n")
@options[:title] ||= file.empty? ? "Gist: #{gist}" : file
@options[:url] ||= "https://gist.github.com/#{gist}"
@options[:lang] ||= file.empty? ? @options[:lang] || '' : file.split('.')[-1]
@options[:link_text] ||= "Gist page"
@options[:no_cache] = @cache_disabled
@options[:cache_path] = @cache_disabled ? nil : get_cache_path(@cache_folder, get_cache_file(gist, file), @markup + @options.to_s)
cache = read_cache(@options[:cache_path])
unless cache
code = get_gist_from_web(gist, file)
length = code.lines.count
@start ||= 1
@end ||= length
return "#{file} is #{length} lines long, cannot begin at line #{@start}" if @start > length
return "#{file} is #{length} lines long, cannot read beyond line #{@end}" if @end > length
if @start > 1 or @end < length
code = code.split(/\n/).slice(@start -1, @end + 1 - @start).join("\n")
end
code = highlight(code, @options[:lang], @options)
end
lang = file.empty? ? @lang || '' : file.split('.')[-1]
link = "https://gist.github.com/#{gist}"
title = file.empty? ? "Gist: #{gist}" : file
highlight(code, lang, { title: @title || title, url: link, link_text: @link_text || 'Gist page', marks: @marks, linenos: @linenos, start: @start })
code
else
""
"Gist formatting error, format should be {% gist gist_id [filename] %}"
end
end
@ -61,25 +73,13 @@ module Jekyll
"https://raw.github.com/gist/#{gist}/#{file}"
end
def cache(gist, file, data)
cache_file = get_cache_file_for gist, file
File.open(cache_file, "w") do |io|
io.write data
end
end
def get_cached_gist(gist, file)
return nil if @cache_disabled
cache_file = get_cache_file_for gist, file
File.read cache_file if File.exist? cache_file
end
def get_cache_file_for(gist, file)
def get_cache_file(gist, file)
bad_chars = /[^a-zA-Z0-9\-_.]/
gist = gist.gsub bad_chars, ''
file = file.gsub bad_chars, ''
md5 = Digest::MD5.hexdigest "#{gist}-#{file}"
File.join @cache_folder, "#{gist}-#{file}-#{md5}.cache"
name = gist
name += "-#{file}" unless file.empty?
name
end
def get_gist_from_web(gist, file)
@ -96,10 +96,7 @@ module Jekyll
https.verify_mode = OpenSSL::SSL::VERIFY_NONE
request = Net::HTTP::Get.new raw_uri.request_uri
data = https.request request
code = data.body.to_s
cache gist, file, code unless @cache_disabled
code
data.body.to_s
end
end

View File

@ -32,19 +32,24 @@ module HighlightCode
lang = 'csharp' if lang == 'cs'
lang = 'plain' if lang == '' or lang.nil? or !lang
url = options[:url] || nil
title = options[:title] || (url ? ' ' : nil)
link_text = options[:link_text] || nil
wrap = options[:wrap] || true
marks = options[:marks]
linenos = options[:linenos]
start = options[:start] || 1
url = options[:url] || nil
title = options[:title] || (url ? ' ' : nil)
link_text = options[:link_text] || nil
wrap = options[:wrap] || true
marks = options[:marks]
linenos = options[:linenos]
start = options[:start] || 1
no_cache = options[:no_cache] || false
cache_path = options[:cache_path] || nil
path = File.join(PYGMENTS_CACHE_DIR, "#{lang}-#{Digest::MD5.hexdigest(options.to_s + code)}.html") if defined?(PYGMENTS_CACHE_DIR)
# Attempt to retrieve cached code
cache = nil
unless no_cache
path = cache_path || get_cache_path(PYGMENTS_CACHE_DIR, lang, options.to_s + code)
cache = read_cache(path)
end
if File.exist?(path)
code = File.read(path)
else
unless cache
if lang == 'plain'
# Escape html tags
code = code.gsub('<','&lt;')
@ -54,12 +59,20 @@ module HighlightCode
code = tableize_code(code, lang, {linenos: linenos, start: start, marks: marks })
title = captionize(title, url, link_text) if title
code = "<figure class='code'>#{title}#{code}</figure>"
File.open(path, 'w') {|f| f.print(code) } if path
File.open(path, 'w') {|f| f.print(code) } unless no_cache
end
code = safe_wrap(code) if wrap
code = safe_wrap(cache || code) if wrap
code
end
def read_cache (path)
code = File.exist?(path) ? File.read(path) : nil
end
def get_cache_path (dir, name, str)
File.join(dir, "#{name}-#{Digest::MD5.hexdigest(str)}.html")
end
def captionize (caption, url, link_text)
figcaption = "<figcaption>#{caption}"
figcaption += "<a href='#{url}'>#{(link_text || 'link').strip}</a>" if url