class Node2XHTML:
def __init__(self, mod, node=None, error_report=None, encode_name=None
):
self.mod = mod
self.valid_html40 = False
self.encode = self.mod.encode
if encode_name is None:
encode_name = self.mod.encode_name
self.encode_name = encode_name
if error_report is not None:
self.error_report = error_report
self.document_lang = None
self.header_nodes = []
self.indent = 0
self.indentstep = 1
self.set_out([])
# xxx where do this?
charset = 'utf-8'
self.header_nodes.append(self.mod.node_of_taci(
'meta', '', (
self.mod.node_of_taci('http-equiv=', 'Content-Type'),
self.mod.node_of_taci('content=',
'text/html; charset=%s' % charset))))
if node is not None:
node.accept(self)
def _visit_children(self, node):
node, attrs = node.split_attrs()
# xxx handle attrs?
E = self.mod.ReportedError
for ch in node.children:
try:
ch.accept(self)
except E:
pass
def begin(self, tag, arg=''):
t = '<'+tag
if arg:
t = t + ' ' + arg
if tag in self.mod._no_end_tag_elements:
# Quote from: http://gutenberg.hwg.org/markupXHTML.html
# N.B. The penultimate closing slash on empty elements such as the
# element can cause a problem in older browsers. For this reason it is
# recommended that you leave a space before the slash, namely
t += ' /'
t += '>'
if tag in self.mod.line_break_allowed:
t = '\n'+self.indent * ' ' + t
self.append(t)
self.indent += self.indentstep
def chg_out(self, out):
oo = self.out
self.set_out(out)
return oo
def encode_link_name(self, name):
# 1. Make the name better looking for a html user's perspective
# 2. Encode it by HTML rules
if name.startswith(self.mod.tgt_prefix):
name = name[len(self.mod.tgt_prefix):]
else:
# Should not happen often or at all
assert 0
name = self.encode_name(name)
return name
def end(self, tag):
self.indent -= self.indentstep
self.append('%s>' % tag)
def error(self, msg, *args, **kwds):
msg = 'Doc2XHTML: ' + msg
self.error_report(msg, *args, **kwds)
def error_report(self, msg, *args, **kwds):
print('HTML ENCODING ERROR: ', msg, 'args=', args, 'kwds=', kwds)
raise ValueError
def gen_document_header(self, lang, header_nodes):
# lang & title are nodes with text or char directives, to be encoded.
# metas is a list of nodes, with data to be encoded
strict = 1 # we have alternatives, I just havent yet decided how or if to let the user choose
if strict:
self.append("""\
""")
else:
self.append("""\
""")
self.begin('html',
'lang=%r xmlns="http://www.w3.org/1999/xhtml"' % self.get_encoded_text(
lang),
)
self.begin('head')
for node in header_nodes:
self.gen_stdhtml(node)
self.end('head')
self.begin('body')
# Get around w3c restriction that character data are not allowed
# directly in body, makes it easier to write compliant code
# Arguably the restriction is there for a reason, but I dont know...
self.begin('div')
def gen_document_trailer(self):
self.end('div')
self.end('body')
self.end('html')
def gen_empty_elmt(self, tag, arg=''):
self.begin(tag, arg)
self.indent -= self.indentstep
def gen_generated_from_gsl(self):
self.gen_empty_elmt('hr')
self.append('Generated by ')
self.begin('a', 'href="https://zhuyifei1999.github.io/guppy3/gsl.html"')
#self.begin('a', 'href="gsl.html"')
self.append('GSL-XHTML 0.1.7')
self.end('a')
self.append(' on '+self.mod.time.asctime(self.mod.time.localtime()))
def gen_meta(self, node, tag=None):
mknode = self.mod.node_of_taci
if tag is None:
tag = node.tag
self.header_nodes.append(
mknode('meta', '',
[mknode('name=', tag),
mknode('content=', node.arg, node.children)]))
def gen_stdhtml(self, node, tag=None, **options):
if tag is None:
tag = node.tag
node, attrs = node.split_attrs(tag)
self.begin(tag, ' '.join(['%s=%r' % (key, val)
for (key, val) in attrs]))
if tag in self.mod._no_end_tag_elements:
if node.arg:
self.error(
'No enclosed text allowed for Html tag: %r.' % node.tag)
self.no_children(node)
self.indent -= self.indentstep
else:
node.arg_accept(self)
self.end(tag)
def get_encoded_text(self, node):
# From a node's arg and children that are text or characters
old_out = self.chg_out([])
self.append(self.encode(node.arg))
for ch in node.children:
if ch.tag in ('text', 'char'):
ch.accept(self)
else:
self.error('Only text and char allowed here, not %r.' %
ch.tag, ch)
return ''.join(self.chg_out(old_out))
def get_html(self):
return ''.join(self.out)
def no_children(self, node):
if node.children:
self.error('No children allowed for %r. Got children nodes = %r.' % (
node.tag, node.children))
def set_out(self, out):
self.out = out
self.extend = out.extend
self.append = out.append
def visit_author(self, node):
self.gen_meta(node)
def visit_block(self, node):
self._visit_children(node)
def visit_char(self, node):
name = node.get_namearg()
if name in self.mod.name2codepoint:
name = '&%s;' % name
else:
if name[:2] == "0x":
char = int(name[2:], 16)
elif name.isdigit():
char = int(name)
else:
self.error('No such character: %r.' % name, node)
name = self.mod.codepoint2name.get(char)
if name is None:
name = '%d;' % char
else:
name = '&%s;' % name
self.append(name)
self._visit_children(node)
def visit_col_width(self, node):
self.append('
:
')
self.append(':
')
def visit_spc_mapsto(self, node):
self.append(' -> ')
def visit_string(self, node):
self._visit_children(node)
def visit_symbol(self, node):
self.visit_text(node)
def visit_text(self, node):
text = self.encode(node.arg)
if len(text) > 80 or '\n' in text:
self.append('\n')
self.append(text)
self._visit_children(node)
def visit_to_document_only(self, node):
self._visit_children(node)
def visit_to_html_only(self, node):
self._visit_children(node)
def visit_to_tester_only(self, node):
pass
def visit_valid_html40(self, node):
self.valid_html40 = node
node, attrs = self.valid_html40.split_attrs(attrdict=True)
# XXX check allowed attrs but in a GENERAL way
# Code taken from validator.w3.org
self.append("""\