;; Given a list of SExps, SEXP-LIST, return this translated into a string ;; ;; Relies on SRFI-6, Basic String Ports. ;; ;; This is a simple, easily embeddable routine for converting S-expressions ;; to XML. It's inspired by SXML (see http://okmij.org/ftp/Scheme/xml.html), ;; but because it's only for writing XML, rather than reading, transforming, ;; and much else beside, it's much smaller. ;; ;; Usage: ;; ;; (sexp->xml ) => string ;; (sexp->xml ? ? output-html?) ;; => string ;; (sexp->xml port ? ? output-html?) ;; => undef, but XML is written to the given port ;; (sexp->html port? ) ;; -- same as (sexp->xml port? #f #f #t) ;; ;; Takes three optional arguments: the first specifies a list of ;; elements which are to be formatted (ie, have linebreaks inserted) ;; as `block' elements (like
in HTML), and the second a list ;; which should be formatted as `para' elements (like HTML

). ;; Either may be given as 'ALL to format all like this. The default ;; for each is #f, implying no linebreaking. ;; ;; The third optional argument is a switch -- if true, the output is ;; slightly adjusted, so that the result is valid HTML (empty elements ;; are output as '
' rather than '
', and PIs are turned into comments. ;; In this mode, the block- and para-element-list arguments are ;; defaulted to versions appropriate for HTML. As discussed at ;; there are some gotchas associated with ;; distributing XHTML as text/html. ;; ;; Examples: ;; ;; (sexp->xml '(top (el1 "hello") (el2 (@ (att val))))) ;; => "hello" ;; ;; (sexp->xml '(top (el1 "hello") (el2 (@ (att val)))) '(el1)) ;; => "\nhello\n\n" ;; ;; (sexp->xml '(html (head (title "SEXPs") (meta)) ;; (body (p (@ (class simple)) hello)))) ;; => "SEXPs ..." ;; (sexp->html '(html (head (title "SEXPs") (meta)) ;; (body (p (@ (class simple)) hello)))) ;; => "\n\nSEXPs\n\n ..." ;; ;; Partly for compatibility with SXML, you can wrap the inside a *TOP* sexp: ;; (sexp->xml '(*TOP* (p "hello"))) ;; ;; We can also handle PIs, CTYPE marked sections, and comments: ;; (sexp->xml '(*PI* "content")) => "" ;; (sexp->xml '(*CDATA* "hello<&" "more")) => "" ;; (sexp->xml '(*COMMENT* "comment" "stuff")) => "" ;; ;; You can generate the initial XML programming instruction with ;; *XMLPI*, and a doctype declaration with *DOCTYPE*. *XMLPI* takes ;; an optional argument which is either a symbol abbreviation for a ;; character encoding (one of utf8, utf16, ucs2, ucs4, 8859-n (n=1-16, ;; excluding 12) 2022-jp, shift-jis, or euc-jp) or a literal string. ;; The *DOCTYPE* form may take either one or three arguments: it may ;; have either a single symbol argument (one of html4, xhtml, ;; xhtml-10-strit, xhtml-10-transitional, xhtml-10-frameset or rdfa) or three string ;; arguments indicating the 'Name', 'PubidLiteral' and 'SystemLiteral' ;; of the 'doctypedecl' production of ;; . If either of these ;; forms is included, it is necessary to wrap them and the content ;; form inside a '(*TOP* ...)' form. ;; ;; Any content other than an element type which is #f, we simply ignore ;; (sexp->xml '(p "Hello" #f "!")) => "Hello!" ;; ;; In addition, we define the utility function ESCAPE-STRING-FOR-XML: ;; (escape-string-for-xml "hello "hello<there" ;; ;; ;; Copyright 2006-2010 Norman Gray, ;; Released under the terms of the GNU General Public Licence. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Configuration. ;; Uncomment as appropriate for your Scheme implementation ;; scsh: ;; Give '-o srfi-6' on command line, or define a module with ;; (define-structure sexp->xml (export sexp->xml) (open srfi-6) ... ) ;@CONFIG scsh@(define pformat format) ;@CONFIG scsh@(define open-output-string make-string-output-port) ;@CONFIG scsh@(define get-output-string string-output-port-output) ;; sisc: ;@CONFIG sisc@(define pformat format) ;@CONFIG sisc@(require-library 'sisc/libs/srfi/srfi-6) ;@CONFIG sisc@(import srfi-6) ;; mzscheme: ;@CONFIG mzscheme@(define (pformat port fmt . args) ;@CONFIG mzscheme@ (cond ((port? port) ;@CONFIG mzscheme@ (display (apply format (cons fmt args)) port)) ;@CONFIG mzscheme@ (port ; true value, send to current output port ;@CONFIG mzscheme@ (display (apply format (cons fmt args)))) ;@CONFIG mzscheme@ (else ; port => #f, return string ;@CONFIG mzscheme@ (apply format (cons fmt args))))) ;@CONFIG mzscheme@(require (lib "6.ss" "srfi")) ;; You might want to enable the self-tests at the bottom. (define-syntax when (syntax-rules () ((_ test form . forms) (if test (let () form . forms) #f)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Implementation (define (sexp->xml arg . args) (if (output-port? arg) (if (null? args) (error "sexp->xml: no sexp argument found") (apply sexp->xml* (cons arg args))) (let ((string-port (open-output-string))) (apply sexp->xml* `(,string-port ,arg ,@args)) (get-output-string string-port)))) (define (sexp->html . args) (cond ((and (= (length args) 1) (list? (car args))) (let ((string-port (open-output-string))) (sexp->xml* string-port (car args) #f #f #t) (get-output-string string-port))) ((and (= (length args) 2) (output-port? (car args))) (sexp->xml* (car args) (cadr args) #f #f #t)) (else (error "Wrong number of arguments to sexp->html")))) (define (sexp->xml* port s . opts) (let ((block-elems (and (> (length opts) 0) (car opts))) (para-elems (and (> (length opts) 1) (cadr opts))) (output-html? (and (> (length opts) 2) ;defaults false (caddr opts)))) (cond ((not s) #f) ((string? s) (display s port)) ((symbol? s) (display s port)) ((number? s) (pformat port "~a" s)) ((and (list? s) (or (null? s) (not (symbol? (car s))))) (error (pformat #f "malformed element name ~s [should be a symbol]" (car s)))) ((list? s) (if (and (> (length s) 1) (list? (cadr s)) (eq? (caadr s) '@)) (sexp->xml-write* port (car s) (cdadr s) (cddr s) block-elems para-elems output-html?) (sexp->xml-write* port (car s) #f (cdr s) block-elems para-elems output-html?))) (else (error (pformat #f "Unrecognised type of object (~s) in sexp->xml" s)))))) ;; Handle the *XMLPI* form ;; (These are the encodings and corresponding abbreviations which are ;; listed as 'should's in ) (define (handle-xmlpi port content) (let ((encoding-map ; see http://www.w3.org/TR/REC-xml/#charencoding '((utf8 . "UTF-8") (utf16 . "UTF-16") (ucs2 . "ISO-10646-UCS-4") (ucs4 . "ISO-10646-UCS-2") ;; include all the ISO-8859 sections (8859-12 does not exist) (8859-1 . "ISO-8859-1") (8859-2 . "ISO-8859-2") (8859-3 . "ISO-8859-3") (8859-4 . "ISO-8859-4") (8859-5 . "ISO-8859-5") (8859-6 . "ISO-8859-6") (8859-7 . "ISO-8859-7") (8859-8 . "ISO-8859-8") (8859-9 . "ISO-8859-9") (8859-10 . "ISO-8859-10") (8859-11 . "ISO-8859-11") (8859-13 . "ISO-8859-13") (8859-14 . "ISO-8859-14") (8859-15 . "ISO-8859-15") (8859-16 . "ISO-8859-16") (2022-jp . "ISO-2022-JP") (shift-jis . "Shift_JIS") (euc-jp . "EUC-JP")))) (cond ((null? content) (pformat port "~%")) ((string? (car content)) (pformat port "~%" (car content))) ((assv (car content) encoding-map) => (lambda (p) (pformat port "~%" (cdr p)))) (else (error (pformat #f "Unrecognised encoding: ~s" (car content))))))) ;; Handle the *DOCTYPE* form ;; (should we add more doctype symbols here?) (define (handle-doctype port content) (let ((doctype-map ;; See http://www.w3.org/QA/2002/04/valid-dtd-list.html '((html4 ; http://www.w3.org/TR/html401/ "html" "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd") (xhtml-10 "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") (xhtml-10-transitional "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") (xhtml-10-frameset "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") (xhtml "html" "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") (rdfa ; http://www.w3.org/TR/rdfa-syntax/#a_DTD_driver "html" "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")))) (case (length content) ((1) (cond ((and (symbol? (car content)) (assv (car content) doctype-map)) => (lambda (l) (pformat port "~%" (list-ref l 1) (list-ref l 2) (list-ref l 3)))) (else (error (pformat #f "Unrecognised content type: ~s" (car content)))))) ((3) (pformat port "~%" (list-ref content 0) (list-ref content 1) (list-ref content 2))) (else (error (pformat #f "Malformed argument to *DOCTYPE*: ~s" content)))))) ;; Write out an element with attributes, and formatting depending on the ;; element `type'. ;; GI: a symbol containing the name of the element to be written ;; ATTLIST: a list of two-element lists, each containing (attribute value), ;; as either symbols or strings ;; CONTENT: a sexp representing the element content ;; BLOCK-ELEMENT-LIST and PARA-ELEMENT-LIST: either a list of symbols ;; or the symbol 'ALL. If the GI is found in one of the lists, or the ;; relevant variable has the value 'ALL, then the element is formatted ;; as a block element, a paragraph element, or an inline element if it ;; is in neither list. ;; Internal function (define (sexp->xml-write* port gi attlist content block-element-list para-element-list output-html?) (let ((block-elements (or block-element-list (and output-html? '(html head body div ul ol)) '())) (para-style (or para-element-list (and output-html? '(p title link h1 h2 h3 h4 h5 h6 li)) '())) (empty-tag-end (if output-html? ">" "/>"))) (cond ((eqv? gi '*TOP*) (for-each (lambda (x) (sexp->xml* port x block-element-list para-element-list output-html?)) content)) ((eqv? gi '*PI*) (pformat port (if output-html? "" "") (apply string-append content))) ((eqv? gi '*XMLPI*) (handle-xmlpi port content)) ((eqv? gi '*DOCTYPE*) (handle-doctype port content)) ((eq? gi '*CDATA*) (pformat port "" (apply string-append content))) ((eq? gi '*COMMENT*) (pformat port "" (apply string-append content))) ((null? content) ;; empty element (pformat port "<~a" gi) (when attlist (for-each (lambda (p) (pformat port " ~a='~a'" (car p) (cadr p))) attlist)) (if (or (eq? block-elements 'ALL) (memq gi block-elements) (eq? para-style 'ALL) (memq gi para-style)) (begin (display empty-tag-end port) (newline port)) (display empty-tag-end port))) (else ; has list content (pformat port "<~a" gi) (when attlist (for-each (lambda (p) (pformat port " ~a='~a'" (car p) (cadr p))) attlist)) (if (or (eq? block-elements 'ALL) (memq gi block-elements)) (begin (display ">" port) (newline port)) (display ">" port)) (for-each (lambda (x) (sexp->xml* port x block-element-list para-element-list output-html?)) content) (pformat port (cond ((or (eq? block-elements 'ALL) (memq gi block-elements)) "~%~%") ((or (eq? para-style 'ALL) (memq gi para-style)) "~%") (else "")) gi))))) ;; ESCAPE-STRING-FOR-XML string -> string ;; ESCAPE-STRING-FOR-XML string port -> ;; ;; Given a string, return the string with < and & characters escaped. ;; ;; If the optional port argument is present, write to the given port instead. ;; PORT may be a port, or #t, in which case output is to the ;; (current-output-port), or #f, in which case the result is returned ;; as a string. (define (escape-string-for-xml s . opt-port) (define (write-while-escaping l port) (and (not (null? l)) (let ((c (car l))) (cond ((char=? c #\<) (display "<" port)) ((char=? c #\&) (display "&" port)) ((char=? c #\>) (display ">" port)) ;for symmetry (else (display c port))) (write-while-escaping (cdr l) port)))) (cond ((or (null? opt-port) ; return a string (not (car opt-port))) ; argument is #f (let ((sp (open-output-string))) (write-while-escaping (string->list s) sp) (get-output-string sp))) ((output-port? (car opt-port)) ; send it to the given port (write-while-escaping (string->list s) (car opt-port))) (else ; not port, but true (should be just #t), so send to curr. output (write-while-escaping (string->list s) (current-output-port))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Tests ;; To enable tests, uncomment the lines starting ;@test@ and load this file. ;; In the scsh case, we need to include '-o srfi-6 -o handle' on the command line ;@test@(define (call-with-string-port thunk) ;@test@ (let ((sp (open-output-string))) ;@test@ (thunk sp) ;@test@ (get-output-string sp))) ;@test@(define ntests 0) ;@CONFIG mzscheme@;@test@(define (printf fmt . args) ;@CONFIG mzscheme@;@test@ (display (apply format (cons fmt args)))) ;@CONFIG sisc@;@test@(define (printf fmt . args) (apply format `(#t ,fmt . ,args))) ;@CONFIG scsh@;@test@(define (printf fmt . args) (apply format `(#t ,fmt . ,args))) ;@test@ ;@test@(define-syntax expect ;@test@ (syntax-rules () ;@test@ ((_ id expected body ...) ;@test@ (let ((test ((lambda () body ...)))) ;@test@ (if (equal? expected test) ;@test@ (set! ntests (+ ntests 1)) ;@test@ (begin (printf "Test ~a~% produced ~s~% expected ~s~%" ;@test@ (quote id) test expected) ;@test@ (exit 1))))))) ;@CONFIG mzscheme@;@test@(define-syntax error-is-success ;@CONFIG mzscheme@;@test@ (syntax-rules () ;@CONFIG mzscheme@;@test@ ((_ body) ;@CONFIG mzscheme@;@test@ (with-handlers ((exn:fail? (lambda (e) #f))) ;@CONFIG mzscheme@;@test@ (set! ntests (+ ntests 1)) ;@CONFIG mzscheme@;@test@ body)))) ;@CONFIG scsh@;@test@(define-syntax error-is-success ;@CONFIG scsh@;@test@ (syntax-rules () ;@CONFIG scsh@;@test@ ((_ body) ;@CONFIG scsh@;@test@ (call-with-current-continuation ;@CONFIG scsh@;@test@ (lambda (k) ;@CONFIG scsh@;@test@ ;; requires '-o handle' on scsh commandline ;@CONFIG scsh@;@test@ (with-handler (lambda (condition next) (k #f)) ;@CONFIG scsh@;@test@ (lambda () ;@CONFIG scsh@;@test@ (set! ntests (+ ntests 1)) ;@CONFIG scsh@;@test@ body))))))) ;@test@(define-syntax expect-failure ;@test@ (syntax-rules () ;@test@ ((_ body) ;@test@ (expect-failure #f body)) ;@test@ ((_ label body) ;@test@ (error-is-success ;@test@ (let ((test ((lambda () body)))) ;@test@ ;; shouldn't get here ;@test@ (printf "Test ~a~% produced ~s~% expected exception!~%" ;@test@ (quote label) test) ;@test@ (exit 1)))))) ;@test@ ;@test@ ;@test@(expect sexp->xml-simple ;@test@ "

hello there

" ;@test@ (sexp->xml '(p "hello " (em there) (br)))) ;@test@(expect sexp->xml-html ;@test@ "

hello there

\n" ;@test@ (sexp->xml '(p "hello " (em there) (br)) ;@test@ #f #f #t)) ;@test@(expect sexp->xml-html-formatting ;@test@ "

hello there\n
\n

\n" ;@test@ (sexp->xml '(p "hello " (em there) (br)) ;@test@ #f 'ALL #t)) ;@test@(expect sexp->html-simple ;@test@ "

hello there

\n" ;@test@ (sexp->html '(p "hello " (em there) (br)))) ;@test@ ;@test@(expect sexp->xml-port ;@test@ "

hello there

" ;@test@ (call-with-string-port ;@test@ (lambda (sp) ;@test@ (sexp->xml sp '(p "hello " (em there)))))) ;@test@ ;@test@(expect sexp->xml-body ;@test@ "

\nhello\nthere\n\n\nfriend\n\n

\n\n" ;@test@ (call-with-string-port ;@test@ (lambda (sp) ;@test@ (sexp->xml sp ;@test@ '(p "hello" (em there) (em friend)) ;@test@ '(p em))))) ;@test@ ;@test@(expect sexp->xml-body-para ;@test@ "\nhello\n\n\n" ;@test@ (sexp->xml '(top (el1 "hello") (el2 (@ (att val)))) ;@test@ '(top) ;@test@ '(el1))) ;@test@ ;@test@(expect-failure malformed-sexp ;@test@ (sexp->xml '(top ((el1 "content")) (el2 "content2")))) ;@test@ ;@test@ ;@test@(expect sexp->xml-pi ;@test@ "

hello

" ;@test@ (sexp->xml '(p "hello" (*PI* "xml version='1.0'")))) ;@test@(expect sexp->html-pi ;@test@ "

hello

\n" ;@test@ (sexp->html '(p "hello" (*PI* "xml version='1.0'")))) ;@test@(expect sexp->xml-cdata ;@test@ "

hello

" ;@test@ (sexp->xml '(p "hello " (*CDATA* "ping<&" "and stuff")))) ;@test@(expect sexp->xml-comment ;@test@ "

Hello

" ;@test@ (sexp->xml '(p "Hello " (*COMMENT* "My comm" "ent ")))) ;@test@ ;@test@(expect sexp->xml-doctype ;@test@ "\n\n

hello

" ;@test@ (sexp->xml '(*TOP* (*XMLPI*) ;@test@ (*DOCTYPE* xhtml) ;@test@ (p "hello")))) ;@test@(expect sexp->xml-doctype-normal ;@test@ "\n\n

hello

" ;@test@ (sexp->xml '(*TOP* (*XMLPI* utf8) ;@test@ (*DOCTYPE* rdfa) ;@test@ (p "hello")))) ;@test@(expect sexp->xml-doctype-options ;@test@ "\n\n

hello

" ;@test@ (sexp->xml '(*TOP* (*XMLPI* "ISO-8859-WIBBLE") ;@test@ (*DOCTYPE* html ;@test@ "-//W3C//DTD XHTML+RDFa 1.0//EN" ;@test@ "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd") ;@test@ (p "hello")))) ;@test@ ;@test@(expect sexp-xml-false ;@test@ "

Hello !

" ;@test@ (sexp->xml '(p "Hello " #f "!"))) ;@test@ ;@test@ ;@test@(expect escape-string-for-xml ;@test@ "hello<there&again>" ;@test@ (escape-string-for-xml "hello")) ;@test@ ;@test@(expect escape-string-for-xml-to-false ;@test@ "hello<there&again>" ;@test@ (escape-string-for-xml "hello" #f)) ;@test@ ;@test@(expect escape-string-for-xml-to-port ;@test@ "hello<there&again>" ;@test@ (call-with-string-port ;@test@ (lambda (sp) ;@test@ (escape-string-for-xml "hello" sp)))) ;@test@ ;@test@(printf "sexp-xml: passed ~a tests~%" ntests) ;@test@(exit 0) ;; History: ;; changeset: 77:e9c2ec708d93 ;; tag: tip ;; user: Norman Gray ;; date: Mon Jan 18 16:59:50 2010 +0000 ;; summary: Fix DTDs ;; ;; changeset: 71:649742dade75 ;; user: Norman Gray ;; date: Mon Jan 18 15:48:27 2010 +0000 ;; summary: Add XMLPI and DOCTYPE forms ;; ;; changeset: 69:3fd97c36e9d9 ;; user: Norman Gray ;; date: Tue May 12 11:40:58 2009 +0100 ;; summary: define (when) syntax for the sake of schemes which don't support it yet ;; ;; changeset: 68:bbeae2ba8bf9 ;; user: Norman Gray ;; date: Sun Jan 11 22:27:15 2009 +0000 ;; summary: Remove #lang module stuff ;; ;; changeset: 67:d1b310eb06f2 ;; user: Norman Gray ;; date: Sun Jan 11 21:48:28 2009 +0000 ;; summary: Fix sexp-xml for mzscheme 4 ;; ;; changeset: 52:9abdff874d5e ;; user: norman ;; date: Sun Oct 12 16:57:51 2008 +0100 ;; summary: Handle content of #f by ignoring it ;; ;; changeset: 47:2cf05658fdfe ;; user: norman ;; date: Fri Feb 29 22:25:55 2008 +0000 ;; summary: Improve *COMMENT*: there are now no extra spaces added ;; ;; changeset: 45:e9e2594ae59b ;; user: norman ;; date: Thu Aug 09 10:51:22 2007 +0100 ;; summary: Added sexp->html ;; ;; changeset: 44:e9a4f4eeec82 ;; user: norman ;; date: Wed Aug 01 17:52:48 2007 +0100 ;; summary: Adjustments to self-test support (make it more compatible with my test ;; ;; changeset: 43:e29a7a57fb06 ;; user: norman ;; date: Wed Aug 01 14:24:36 2007 +0100 ;; summary: Added a mzscheme directory. ;; ;; changeset: 40:6b44171188d7 ;; user: norman ;; date: Tue Oct 10 16:04:22 2006 +0100 ;; summary: Add support for (*COMMENT* ...) ;; ;; changeset: 37:1fb4dd532f9f ;; user: norman ;; date: Fri Sep 01 18:32:10 2006 +0100 ;; summary: Add tweaks to make post-distribution configuration a little easier ;; ;; changeset: 27:b427e3ea9210 ;; user: norman ;; date: Sat Aug 12 22:47:46 2006 +0100 ;; summary: SEXP->XML now optionally writes to a port. ;; ;; changeset: 18:37a95cdddf0e ;; user: norman ;; date: Mon Jul 24 12:43:25 2006 +0100 ;; summary: Tidy up documentation in header ;; ;; changeset: 13:5762b5a55f78 ;; user: norman ;; date: Wed May 17 21:26:16 2006 +0100 ;; summary: Switch where block-elems and para-elems are defaulted ;; ;; changeset: 2:eb2429f82b33 ;; user: norman ;; date: Tue Dec 13 12:21:43 2005 +0000 ;; summary: Remove scsh-specific structure/module code (now in scsh/sexp-xml.scm) ;; ;; changeset: 0:c612d0ee0e0f ;; user: norman ;; date: Sun Nov 06 00:52:37 2005 +0000 ;; summary: Initial version ;;