Website - Metadata Reader

Menu

Introduction

These functions complement Org Mode publishing features by adding functions to read and manipulate metadata from Org Mode files.

The metadata can then be used to build blog index pages, sitemaps, etc, using the literate programming features Org Mode provides.

The function you want to use is: files-metadata which return a list of association list. Each element collects the metadata of a file and is in the form:

(("FILENAME" . "./admin/switching-to-nginx.org")
 ("BASENAME" . "switching-to-nginx.org")
 ("RELATIVE" . "admin/switching-to-nginx.org")
 ("ABSOLUTE" . "switching-to-nginx.org")
 ("IS_POST")
 ("CATEGORY_FROM_FILE" . "admin")
 ("RELATIVE_URL" . "admin/switching-to-nginx.html")
 ("ABSOLUTE_URL" . "/switching-to-nginx.html")
 ("DATE_ISO8601" . "2014-04-12T100000+0200")
 ("TITLE" . "Switching to NGINX")
 ("AUTHOR" . "Adolfo Villafiorita")
 ("DESCRIPTION" . "Whatever you write in the description property")
 ("DATE" . "<2014-04-12 Sat>")
 ("KEYWORDS" . "webdev, admin")))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; THE EMACS-LISP FILE IS TANGLED FROM THE CORRESPONDING .org FILE
;;; !!! DO NOT EDIT DIRECTLY !!!
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Variables

Directory with posts and pathname separator

(defvar posts-dir "notes"
  "Where do the posts live, relative to the site root?")

(defvar dir-sep "/"
  "The path separator")

List Iterators and Selectors

Replicate in Emacs lisp some utilities implemented in Ruby. There are various libraries implementing similar functions, among which dash and seq. We stick to seq.

(defun flatten (lst)
  (labels ((rflatten (lst1 acc)
                     (dolist (el lst1)
                       (if (listp el)
                           (setf acc (rflatten el acc))
                         (push el acc)))
                     acc))
          (reverse (rflatten lst nil))))

(defun compact (list)
  (seq-filter (lambda (x) x) list))

(defun select (predicate list)
  (seq-filter predicate list))

(defun uniq (list)
  (uniq-ll list nil))

(defun uniq-ll (list accumulator)
  (if (not list)
      (reverse accumulator)
    (let ( (el (car list)) )
      (uniq-ll (cdr list) (if (member el accumulator) accumulator (cons el accumulator))))))

File Filters

Support functions to classify and select files:

(defun starts-with-underscore? (pathname)
  "Return non nil if any component of 'pathname' starts with an underscore.

Similar to Jekyll, files with underscores are ignored."
  (let ( (components (split-string pathname dir-sep)) )
    (seq-filter (lambda (x) (string-match "^_" x)) components)))

(defun is-post? (pathname)
  "Return true if 'pathname' is in the 'posts-dir' directory"
  (let ( (components (split-string pathname dir-sep)) )
    (car (member posts-dir components))))

(defun interesting-files (pathnames)
  "Select the 'pathnames' which we want to process (e.g., they are neither backup files nor do they start with _)"
  (seq-remove (lambda (x) (or (backup-file-name-p x) (starts-with-underscore? x))) pathnames))

Files Metadata

Get the metadata of all the files in a directory. The metadata can be then be used to, e.g., generate the list of posts or a list of files by category.

(defun files-metadata (directory &optional root-directory root-url)
  "Get all Org Mode files in a directory and return their metadata.

Optional argument 'root-directory' and 'root-url' define how paths and
urls are computed.

See the documentation of file-metadata for more details."
  (let* ( (files (directory-files-recursively directory "\\.org$"))
          (filenames (interesting-files files)) )
    (mapcar (lambda (x) (file-metadata x directory root-directory root-url)) filenames)))


(defun files-metadata-dirlist (dirlist &optional root-directory root-url)
  "Apply files-metadata to a list of directories."
  (if list
      (append (files-metadata (car list))
              (files-metadata-dirlist (cdr list)))
    nil))

File Metadata

Get the metadata of a file. The metadata can be then be used to, e.g., generate the list of posts or a list of files by category.

(defun file-metadata (filename &optional directory root-directory root-url properties-regexp)
  "Get the metadata of file 'filename'.

Optional argument 'properties-regexp' overrides the default list
of properties we select from an Org Mode file.

Optional arguments 'directory', 'root-directory' and 'root-url'
determine how URLs are generated for the file.

More in details:

- 'directory' determines the relative URLs: paths are generated
  relative to the value of `directory`
- 'root-directory' determines the absolute URLs: paths are
  generated from this directory, assumed as the root;
  'root-directory' should point to the root of your website
  sources
- 'root-url' is prepended to 'root-directory', if specified.

If not specified filename is used for both RELATIVE_URL and
ABSOLUTE_URL.

Example.

Suppose we have the following structure:

   A -> B -> C -> file.org

Then

   (cd 'A/B/C')
   (file-metadata 'file.org' 'B' 'A' 'https://example.com)

Will return:

   RELATIVE_URL C/file.html             (since we asked to run relative to 'B')
   ABSOLUTE_URL https://example.com/B/C (since we asked relative to 'A')"
  (let* ( (dir (or directory (file-name-directory filename)))
          (root-dir (or root-directory (file-name-directory filename)))

          (basename (file-name-nondirectory filename))
          (relname  (file-relative-name filename dir))
          (absname  (file-relative-name filename root-dir))

          (relurl   (replace-regexp-in-string "\\.org$" ".html" relname)) 
          (absurl   (concat (or root-url "") "/" (replace-regexp-in-string "\\.org$" ".html" absname))) )
    (with-temp-buffer
      (insert-file-contents filename)
      (setq props (org-property-list-to-assoc-list (org-global-props)))
      (append
       (list (cons "FILENAME" filename)
             (cons "BASENAME" basename)
             (cons "RELATIVE" relname)
             (cons "ABSOLUTE" absname)
             (cons "IS_POST"  (is-post? filename))
             (cons "CATEGORY_FROM_FILE" (pathname-to-category filename))
             (cons "RELATIVE_URL" relurl)
             (cons "ABSOLUTE_URL" absurl)
             (cons "DATE_ISO8601" (format-date-iso8601 (cdr (assoc "DATE" props)))))
       props)
      )))

Since some properties in the Org Mode file might be comma separated, we provide a function to make a comma separated string into a list.

(defun prop-to-list (prop metadata)
  "Return a list of values from a property which stores multiple values as a comma-separated string.

    (prop-to-list \"tags\" '((\"tags\" . \"t1,   t2, t3\"))) -> (t1, t2, t3)"
  (split-string (cdr (assoc prop assoc-list)) ", *"))

(defun keywords (metadata-list)
  (compact
   (uniq
    (flatten
     (mapcar '(lambda (x) (prop-to-list "KEYWORDS" x)))))))

Functions to extract properties from a file

(defun pathname-to-category (pathname)
  "Get the post-category of Org Mode file living in 'pathname'.

The post-category is the name of the directory in which the page lives"
  (let ( (components (split-string pathname dir-sep)) )
    (cadr (reverse components))))

(defun org-property-list-to-assoc-list (properties)
  "Make a plist into an association list"
  (mapcar (lambda (x) (cons (org-element-property :key x)
                            (org-element-property :value x)))
          properties))

(defun org-global-props (&optional property buffer)
  "Get the plists of global org properties of current buffer."
  (unless property
    (setq property "\\(TITLE\\|AUTHOR\\|DATE\\|DESCRIPTION\\|CATEGORY\\|KEYWORDS\\)"))
  (with-current-buffer (or buffer (current-buffer))
    (org-element-map (org-element-parse-buffer)
        'keyword
      (lambda (el)
        (when (string-match property (org-element-property :key el)) el)))))

Working on Metadata

(defun sort-metadata (metadata key method)
  "Sort metadata by key using 'method'.

Argument 'metadata' is an association list, 'key' a key of the
assoc list and 'method' a predicate accepted by sort."
  (sort metadata
        '(lambda (x y) (apply method (list (cdr (assoc key x)) (cdr (assoc key y)))))))

(defun group-by-year (metadata)
  "Group metadata by taking the year of the DATE property."
  (seq-group-by
   (lambda (x) (decoded-time-year (parse-time-string (cdr (assoc "DATE" x)))))
   metadata))

(defun group-by-category (metadata)
  "Group metadata by taking the CATEGORY_FROM_FILE property."
  (seq-group-by
   (lambda (x) (cdr (assoc "CATEGORY_FROM_FILE" x)))
   metadata))

(defun group-by (metadata key)
  "Group metadata by 'key'."
  (seq-group-by
   (lambda (x) (cdr (assoc key x)))
   metadata))

(defun sort-group-by-year (group)
  (sort group (lambda (x y) (> (car x) (car y)))))

(defun sort-group-by-category (group)
  (sort group (lambda (x y) (string< (car x) (car y)))))

(defun sort-group (group method)
  (sort group (lambda (x y) (apply method (list (car x) (car y))))))

Emitters

Simple emitters for grouped metadata.

A more powerful alternative is using templatel, which provides a Liquid-like syntax.

(defun group-to-html (group &optional excerpt)
  (mapconcat
   'identity
   (mapcar
    (lambda (x)
      (concat
       (format "<h2>%s</h2>\n" (car x))
       (format "<ul class=\"post-list\">\n")
       (mapconcat 'identity (mapcar (lambda (x) (entry-to-html x excerpt)) (cdr x)) " ")
       (format "</ul>\n\n")))
    group)
   " "))

(defun entry-to-html (entry &optional excerpt)
  (format
   (concat "  <li class=\"post\" data-keywords=\"%s\">\n"
           "    <span class=\"post-date\">%s</span>\n"
           "    <span class=\"post-title\"><a href=\"%s\">%s</a></span>\n"
           (if excerpt      "    <span class=\"post-excerpt\">%s</span>\n" "")
           "  </li>\n")
   ""
   (format-date (cdr (assoc "DATE" entry)))
   (cdr (assoc "RELATIVE_URL" entry))
   (cdr (assoc "TITLE" entry))
   (if (and excerpt (assoc "DESCRIPTION" entry)) (cdr (assoc "DESCRIPTION" entry)) "")
   ))

General Purpose Functions

;; time is required or encode-time will fail
(defun format-date (date-string)
  (let* ( (date (parse-time-string (concat date-string "10:00")))
          (encoded (encode-time date)) )
    (format-time-string "%A, %B %d, %Y" encoded)))

;; time is required or encode-time will fail
(defun format-date-iso8601 (date-string)
  (if date-string
      (let* ( (date (parse-time-string (concat date-string " 10:00")))
              (encoded (encode-time date)) )
        (format-time-string "%Y-%m-%dT%H%M%S%z" encoded))
    (format-time-string "%Y-%m-%dT%H%M%S%z")))

What do we provide?

(provide 'website-metadata)