Improved highlighted code caching and performance

- Pygments caching is broken into a couple utility functions for greater flexibility. - Now Gist cache files are hashed against their gist id and parameters. - Raw gists are no longer cached, only processed gists. - Gist caches are still stored in .gist-cache/ for more atomic cleanup. Note: For the docs, caching speeds up generation by about 1800 milliseconds on my machine.
2024-11-16 06:05:00 -05:00 · 2012-12-24 13:51:05 -06:00 · 2012-12-24 13:51:05 -06:00 · 66a883f2af
commit 66a883f2af
parent ed186b18a2
2 changed files with 67 additions and 57 deletions
--- a/plugins/gist_tag.rb
+++ b/plugins/gist_tag.rb
@ -9,6 +9,7 @@
 require 'cgi'
 require 'digest/md5'
 require 'net/https'
+require 'fileutils'
 require 'uri'
 require './plugins/pygments_code'

@ -20,16 +21,19 @@ module Jekyll
      @cache_disabled = false
      @cache_folder   = File.expand_path "../.gist-cache", File.dirname(__FILE__)

-      options    = parse_markup(markup)
-      @lang      = options[:lang]
-      @title     = options[:title]
-      @lineos    = options[:lineos]
-      @marks     = options[:marks]
-      @url       = options[:url]
-      @link_text = options[:link_text]
-      @start     = options[:start]
-      @end       = options[:end]
-      @markup    = clean_markup(markup)
+      opts = parse_markup(markup)
+      @markup = clean_markup(markup)
+
+      @options = {
+        lang:      opts[:lang],
+        title:     opts[:title],
+        lineos:    opts[:lineos],
+        marks:     opts[:marks],
+        url:       opts[:url],
+        link_text: opts[:link_text],
+        start:     opts[:start],
+        end:       opts[:end]
+      }

      FileUtils.mkdir_p @cache_folder
    end
@ -37,23 +41,31 @@ module Jekyll
    def render(context)
      if parts = @markup.match(/([\d]*) (.*)/)
        gist, file = parts[1].strip, parts[2].strip
-        code       = get_cached_gist(gist, file) || get_gist_from_web(gist, file)

-        length = code.lines.count
-        @start ||= 1
-        @end   ||= length
-        return "#{file} is #{length} lines long, cannot begin at line #{@start}" if @start > length
-        return "#{file} is #{length} lines long, cannot read beyond line #{@end}" if @end > length
-        if @start > 1 or @end < length
-          code = code.split(/\n/).slice(@start -1, @end + 1 - @start).join("\n")
+        @options[:title]     ||= file.empty? ? "Gist: #{gist}" : file 
+        @options[:url]       ||= "https://gist.github.com/#{gist}"
+        @options[:lang]      ||= file.empty? ? @options[:lang] || '' : file.split('.')[-1]
+        @options[:link_text] ||= "Gist page"
+        @options[:no_cache]    = @cache_disabled
+        @options[:cache_path]  = @cache_disabled ? nil : get_cache_path(@cache_folder, get_cache_file(gist, file), @markup + @options.to_s)
+
+        cache = read_cache(@options[:cache_path])
+
+        unless cache
+          code     = get_gist_from_web(gist, file)
+          length   = code.lines.count
+          @start ||= 1
+          @end   ||= length
+          return "#{file} is #{length} lines long, cannot begin at line #{@start}" if @start > length
+          return "#{file} is #{length} lines long, cannot read beyond line #{@end}" if @end > length
+          if @start > 1 or @end < length
+            code = code.split(/\n/).slice(@start -1, @end + 1 - @start).join("\n")
+          end
+          code = highlight(code, @options[:lang], @options)
        end
-
-        lang  = file.empty? ? @lang || '' : file.split('.')[-1]
-        link  = "https://gist.github.com/#{gist}"
-        title = file.empty? ? "Gist: #{gist}" : file
-        highlight(code, lang, { title: @title || title, url: link, link_text: @link_text || 'Gist page', marks: @marks, linenos: @linenos, start: @start })
+        code
      else
-        ""
+        "Gist formatting error, format should be {% gist gist_id [filename] %}"
      end
    end

@ -61,25 +73,13 @@ module Jekyll
      "https://raw.github.com/gist/#{gist}/#{file}"
    end

-    def cache(gist, file, data)
-      cache_file = get_cache_file_for gist, file
-      File.open(cache_file, "w") do |io|
-        io.write data
-      end
-    end
-
-    def get_cached_gist(gist, file)
-      return nil if @cache_disabled
-      cache_file = get_cache_file_for gist, file
-      File.read cache_file if File.exist? cache_file
-    end
-
-    def get_cache_file_for(gist, file)
+    def get_cache_file(gist, file)
      bad_chars = /[^a-zA-Z0-9\-_.]/
      gist      = gist.gsub bad_chars, ''
      file      = file.gsub bad_chars, ''
-      md5       = Digest::MD5.hexdigest "#{gist}-#{file}"
-      File.join @cache_folder, "#{gist}-#{file}-#{md5}.cache"
+      name  = gist
+      name += "-#{file}" unless file.empty?
+      name
    end

    def get_gist_from_web(gist, file)
@ -96,10 +96,7 @@ module Jekyll
      https.verify_mode = OpenSSL::SSL::VERIFY_NONE
      request           = Net::HTTP::Get.new raw_uri.request_uri
      data              = https.request request
-      code              = data.body.to_s
-
-      cache gist, file, code unless @cache_disabled
-      code
+      data.body.to_s
    end
  end

--- a/plugins/pygments_code.rb
+++ b/plugins/pygments_code.rb
@ -32,19 +32,24 @@ module HighlightCode
    lang = 'csharp' if lang == 'cs'
    lang = 'plain' if lang == '' or lang.nil? or !lang

-    url       = options[:url]       || nil
-    title     = options[:title]     || (url ? ' ' : nil)
-    link_text = options[:link_text] || nil
-    wrap      = options[:wrap]      || true
-    marks     = options[:marks]
-    linenos   = options[:linenos]
-    start     = options[:start]     || 1
+    url        = options[:url]        || nil
+    title      = options[:title]      || (url ? ' ' : nil)
+    link_text  = options[:link_text]  || nil
+    wrap       = options[:wrap]       || true
+    marks      = options[:marks]
+    linenos    = options[:linenos]
+    start      = options[:start]      || 1
+    no_cache   = options[:no_cache]   || false
+    cache_path = options[:cache_path] || nil

-    path = File.join(PYGMENTS_CACHE_DIR, "#{lang}-#{Digest::MD5.hexdigest(options.to_s + code)}.html") if defined?(PYGMENTS_CACHE_DIR)
+    # Attempt to retrieve cached code
+    cache = nil
+    unless no_cache
+      path  = cache_path || get_cache_path(PYGMENTS_CACHE_DIR, lang, options.to_s + code)
+      cache = read_cache(path)
+    end

-    if File.exist?(path)
-      code = File.read(path)
-    else
+    unless cache
     if lang == 'plain'
        # Escape html tags
        code = code.gsub('<','&lt;')
@ -54,12 +59,20 @@ module HighlightCode
      code = tableize_code(code, lang, {linenos: linenos, start: start, marks: marks })
      title = captionize(title, url, link_text) if title
      code = "<figure class='code'>#{title}#{code}</figure>"
-      File.open(path, 'w') {|f| f.print(code) } if path
+      File.open(path, 'w') {|f| f.print(code) } unless no_cache
    end
-    code = safe_wrap(code) if wrap
+    code = safe_wrap(cache || code) if wrap
    code
  end

+  def read_cache (path)
+    code = File.exist?(path) ? File.read(path) : nil
+  end
+
+  def get_cache_path (dir, name, str)
+    File.join(dir, "#{name}-#{Digest::MD5.hexdigest(str)}.html")
+  end
+
  def captionize (caption, url, link_text)
    figcaption  = "<figcaption>#{caption}"
    figcaption += "<a href='#{url}'>#{(link_text || 'link').strip}</a>" if url