You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
693 lines
26 KiB
Python
693 lines
26 KiB
Python
7 years ago
|
#!/usr/bin/python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
#
|
||
|
# Builds OpenPOWER Foundation documentation using standard template.
|
||
|
#
|
||
|
# Assumes rst2db has been used to convert rst to docbook.
|
||
|
#
|
||
|
import os, sys, getopt, shutil, errno, subprocess, copy, re
|
||
|
from os import fdopen, remove
|
||
|
from shutil import move
|
||
|
from git import Repo
|
||
|
from lxml import etree
|
||
|
from conf import opf_docbook_settings, master_doc, project
|
||
|
from subprocess import Popen, PIPE
|
||
|
|
||
|
|
||
|
def copy_xml_to_template(src_dir, tgt_dir):
|
||
|
# Copy XML files
|
||
|
src_files = os.listdir(src_dir)
|
||
|
for filename in src_files:
|
||
|
full_file = os.path.join (src_dir, filename)
|
||
|
if (os.path.isfile(full_file)):
|
||
|
shutil.copy(full_file, tgt_dir)
|
||
|
elif (os.path.isdir(full_file)):
|
||
|
try:
|
||
|
os.makedirs(os.path.join(tgt_dir,filename))
|
||
|
except OSError as exception:
|
||
|
if exception.errno != errno.EEXIST:
|
||
|
raise
|
||
|
copy_xml_to_template( os.path.join(src_dir,filename), os.path.join(tgt_dir,filename) )
|
||
|
|
||
|
def update_file(filename, old_str, new_str):
|
||
|
# Verify tag exists
|
||
|
with open(filename) as f:
|
||
|
s = f.read()
|
||
|
if old_str not in s:
|
||
|
print 'Error: "{old_str}" not found in {filename}.'.format(**locals())
|
||
|
sys.exit(-2)
|
||
|
|
||
|
# Safely write the changed content, if found in the file
|
||
|
with open(filename, 'w') as f:
|
||
|
s = s.replace(old_str, new_str)
|
||
|
f.write(s)
|
||
|
|
||
|
def traverse_clean_html_source_examples(filename):
|
||
|
temp_file = filename + '.tmp'
|
||
|
code_found = False
|
||
|
html_source_start_regex = '^<div class="highlight-default"><div class="highlight"><pre>'
|
||
|
html_source_stop_regex = '^</pre></div>'
|
||
|
span_regex = '\<span(\sclass="[a-z]+")?>'
|
||
|
|
||
|
print filename
|
||
|
|
||
|
# Walk file by line
|
||
|
with open(temp_file, 'w') as new_file:
|
||
|
with open(filename) as old_file:
|
||
|
for line in old_file:
|
||
|
if re.match(html_source_start_regex,line):
|
||
|
# print 'DEBUG: Code block start found'
|
||
|
code_found = True
|
||
|
elif re.match(html_source_stop_regex,line):
|
||
|
# print 'DEBUG: Code block stop found'
|
||
|
code_found = False
|
||
|
|
||
|
if code_found:
|
||
|
oldline = line
|
||
|
# Remove </span> references
|
||
|
line = line.replace('</span>', '')
|
||
|
# Remove <span class=...> references
|
||
|
line = re.sub(span_regex, '', line)
|
||
|
# print 'DEBUG: line changed.\n Old: >' + oldline + '<\n New: >' + line + '<'
|
||
|
new_file.write(line)
|
||
|
|
||
|
# Preserve old file
|
||
|
move(filename, filename + '.bak')
|
||
|
|
||
|
# Move new file into old
|
||
|
move(temp_file, filename)
|
||
|
|
||
|
def traverse_clean_html_nodes(element):
|
||
|
|
||
|
if 'ul' in element.tag and element.attrib:
|
||
|
key = element.attrib.keys()[0]
|
||
|
value = element.attrib[key]
|
||
|
if 'id' in key:
|
||
|
first_child = element.__getitem__(0);
|
||
|
if first_child.__len__() == 0:
|
||
|
print 'Error: Bad assumption. <ul> tag is empty.'
|
||
|
|
||
|
# Add attribute to first_child and remove from element
|
||
|
first_child.attrib[ key ] = value;
|
||
|
del element.attrib[ key ]
|
||
|
|
||
|
# print 'DEBUG: <ul> attributes: ', element.attrib
|
||
|
# print 'DEBUG: child attributes: ', first_child.attrib
|
||
|
sys.stderr.write( '**Information: id attribute on <ul> tag to first sub-element, <' + element.tag + '> for ' + key + ' = ' + value + '\n' )
|
||
|
|
||
|
for child in element.getchildren():
|
||
|
traverse_clean_html_nodes(child)
|
||
|
|
||
|
def cleanup_html(infile, outfile):
|
||
|
|
||
|
# Create internal representation of document from infile
|
||
|
parser = etree.XMLParser(remove_comments=False)
|
||
|
tree = etree.parse(infile, parser=parser)
|
||
|
head = tree.getroot()
|
||
|
|
||
|
# print_tree( head, 0, 2 )
|
||
|
|
||
|
# Walk nodes doing any cleanup
|
||
|
traverse_clean_html_nodes(head)
|
||
|
|
||
|
# Persist updates to output file
|
||
|
tree.write(outfile)
|
||
|
|
||
|
# Note: This invocation needs to occur post tree-write because
|
||
|
# it will update file
|
||
|
traverse_clean_html_source_examples(outfile)
|
||
|
|
||
|
def find_match(reference, anchor_node, relationship):
|
||
|
|
||
|
if not anchor_node is None and 'anchor' in anchor_node.tag:
|
||
|
# Try this, verify matching ids
|
||
|
key = anchor_node.attrib.keys()[0]
|
||
|
value = anchor_node.attrib[key]
|
||
|
regex = '^' + reference + '(\.\d+)?$'
|
||
|
|
||
|
# print 'DEBUG: ' + relationship + ' anchor check. Reference: ' + reference + ' Regex: ' + regex + ' Value: ' + value
|
||
|
|
||
|
if re.match(regex,value):
|
||
|
return anchor_node
|
||
|
|
||
|
else:
|
||
|
# print 'DEBUG: Anchor in ' + relationship + ' tag does not match. Expected: ', reference, ' Found: ', value, ' Looking further...'
|
||
|
node = anchor_node
|
||
|
while not node.getprevious() is None:
|
||
|
node = node.getprevious()
|
||
|
if 'anchor' in node.tag:
|
||
|
key = node.attrib.keys()[0]
|
||
|
value = node.attrib[ key ]
|
||
|
if re.match(regex,value):
|
||
|
# print 'DEBUG: Anchor in ' + relationship + ' tag finally match!!!'
|
||
|
return node
|
||
|
# else
|
||
|
# print 'DEBUG: Anchor in ' + relationship + ' tag does not match. Expected: ', reference, ' Found: ', value, ' Looking further...'
|
||
|
|
||
|
else:
|
||
|
# print 'DEBUG: Anchor in ' + relationship + ' tag does not match. Expected: ', reference, ' Found: ', value, ' Anchor node: ', node
|
||
|
return None
|
||
|
|
||
|
else:
|
||
|
# print 'Error: find_match called with non-anchor element. Reference: ' + reference + ' Node: ' + anchor_node + ' Relationship: ' + relationship
|
||
|
return None
|
||
|
|
||
|
def traverse_clean_links(element):
|
||
|
|
||
|
if 'link' in element.tag:
|
||
|
# Note: Terminal tag, no need to recurse
|
||
|
|
||
|
# Gather link details
|
||
|
text = element.text
|
||
|
num_attributes = element.attrib.__len__()
|
||
|
reference = element.attrib.get('linkend',None)
|
||
|
|
||
|
if num_attributes is 1 and not reference is None and text == u'¶':
|
||
|
# Erroneous link message, find related anchor, could be "uncle" or "cousin" (of various degrees)
|
||
|
anchor = None
|
||
|
parent = element.getparent()
|
||
|
grandparent = parent.getparent()
|
||
|
greatuncle = grandparent.getprevious()
|
||
|
|
||
|
# Check Great Uncle for match
|
||
|
anchor = find_match(reference, greatuncle, 'Great Uncle')
|
||
|
|
||
|
# If no match, locate "cousin" and if found, check it
|
||
|
if anchor is None:
|
||
|
cousin = None
|
||
|
if not greatuncle is None:
|
||
|
node = greatuncle
|
||
|
while node.__len__() > 0 and cousin is None:
|
||
|
node = node.__getitem__(node.__len__() -1)
|
||
|
if 'anchor' in node.tag:
|
||
|
cousin = node
|
||
|
|
||
|
if not cousin is None:
|
||
|
anchor = find_match(reference, cousin, 'Cousin')
|
||
|
|
||
|
# If no match, try uncle
|
||
|
if anchor is None:
|
||
|
uncle = parent.getprevious()
|
||
|
anchor = find_match(reference, uncle, 'Uncle')
|
||
|
|
||
|
# Always delete <link> tag of this type (contains only u'¶' for text)
|
||
|
parent.__delitem__(parent.index(element))
|
||
|
|
||
|
if not anchor is None:
|
||
|
# print 'MATCH FOUND: ', reference
|
||
|
|
||
|
# Retrieve attribute key from anchor
|
||
|
# Note: The <link> key is always correctly set by herold in the case of duplicate keys.
|
||
|
# The <anchor> tag may have a "dot" and a number appended to value in <link>.
|
||
|
key = anchor.attrib.keys()[0]
|
||
|
value = anchor.get(key)
|
||
|
if 'title' in parent.tag:
|
||
|
# Add id attribute to Grandparent
|
||
|
grandparent.set(key,value)
|
||
|
else:
|
||
|
# Add id attribute to Parent
|
||
|
parent.set(key,value)
|
||
|
|
||
|
sys.stderr.write( '**Information: removed dummy link and for ' + reference + ' and added proper xml:id as ' + value + '\n' )
|
||
|
|
||
|
# Delete <anchor> tag
|
||
|
anchor_parent = anchor.getparent()
|
||
|
anchor_parent.__delitem__(anchor_parent.index(anchor))
|
||
|
else:
|
||
|
# Nothing more to do
|
||
|
sys.stderr.write( '**Information: Matching <anchor> element not found for reference = ' + reference + '. Link removed.' + '\n' )
|
||
|
|
||
|
|
||
|
else:
|
||
|
for child in element.getchildren():
|
||
|
traverse_clean_links(child)
|
||
|
|
||
|
def traverse_clean_other(element):
|
||
|
if 'informalexample' in element.tag:
|
||
|
# Get key elements around this one
|
||
|
parent = element.getparent()
|
||
|
grandparent = parent.getparent()
|
||
|
|
||
|
# Create new elements -- section and title (use text from informal example element)
|
||
|
new_section = parent.makeelement(grandparent.tag)
|
||
|
new_title = parent.makeelement('title')
|
||
|
title = element.text
|
||
|
new_title.text = title
|
||
|
|
||
|
# Add title to new section
|
||
|
new_section.append(new_title)
|
||
|
|
||
|
# Copy over children from <informalexample> to new <section>
|
||
|
for child in element.getchildren():
|
||
|
element.remove(child)
|
||
|
new_section.append(child)
|
||
|
|
||
|
# print 'DEBUG: old tree...'
|
||
|
# print_tree(parent, 0, 2)
|
||
|
|
||
|
# Add new <section> as next sibling of parent and remove <informalexample> from parent
|
||
|
parent.addnext(new_section)
|
||
|
parent.remove(element)
|
||
|
|
||
|
# print 'DEBUG: new tree...'
|
||
|
# print_tree(parent.getparent(), 0, 3)
|
||
|
|
||
|
sys.stderr.write( '**Information: <informalexample> ' + element.text + ' removed and promoted as <section> with title: ' + title + '\n' )
|
||
|
|
||
|
elif 'note' in element.tag:
|
||
|
# Get key elements around this one
|
||
|
parent = element.getparent()
|
||
|
grandparent = parent.getparent()
|
||
|
|
||
|
# print 'DEBUG: old tree...'
|
||
|
# print_tree(parent, 0, 4)
|
||
|
|
||
|
# Create new elements -- section and title (use text from bridgehead subelement)
|
||
|
new_section = parent.makeelement(parent.tag)
|
||
|
bridgehead = element.__getitem__(0).__getitem__(0)
|
||
|
|
||
|
if not 'bridgehead' in bridgehead.tag:
|
||
|
print 'Error: Bad assumption about <note> structure. Bridgehead not found as expected.'
|
||
|
sys.exit(-20)
|
||
|
|
||
|
title = bridgehead.text
|
||
|
new_title = parent.makeelement('title')
|
||
|
new_title.text = title
|
||
|
|
||
|
# Add title to new section
|
||
|
new_section.append(new_title)
|
||
|
|
||
|
# Remove <bridgehead> from <note>
|
||
|
bridgehead.getparent().remove(bridgehead)
|
||
|
|
||
|
# Copy over remaining items in <note> to new <section>
|
||
|
for child in element.getchildren():
|
||
|
element.remove(child)
|
||
|
new_section.append(child)
|
||
|
|
||
|
# Add new <section> as next sibling of parent and remove <note> from parent
|
||
|
parent.addnext(new_section)
|
||
|
parent.remove(element)
|
||
|
|
||
|
# print 'DEBUG: New tree...'
|
||
|
# print_tree(grandparent, 0, 3)
|
||
|
|
||
|
sys.stderr.write( '**Information: <note> removed and promoted as <section> with title: ' + title + '\n' )
|
||
|
|
||
|
elif 'anchor' in element.tag:
|
||
|
# Get key elements around this one
|
||
|
parent = element.getparent()
|
||
|
|
||
|
# Retrieve anchor details
|
||
|
key = element.attrib.keys()[0]
|
||
|
value = element.attrib[ key ]
|
||
|
|
||
|
# Remove node
|
||
|
parent.remove( element );
|
||
|
|
||
|
sys.stderr.write( '**Information: removed <anchor> with id: ' + value + '\n' )
|
||
|
|
||
|
elif 'section' in element.tag:
|
||
|
#Ensure at least one child beyond <title>
|
||
|
if element.__len__() == 1:
|
||
|
title = element.__getitem__(0).text
|
||
|
parent = element.getparent()
|
||
|
|
||
|
# Make and add empty paragraph to section, just behind title
|
||
|
new_para = parent.makeelement('para')
|
||
|
new_para.text = ' '
|
||
|
element.append(new_para)
|
||
|
|
||
|
sys.stderr.write( '**Information: <para> tag added to empty section with title: ' + title + '\n' )
|
||
|
|
||
|
for child in element.getchildren():
|
||
|
traverse_clean_other(child)
|
||
|
|
||
|
def cleanup_xml(infile, outfile):
|
||
|
# Create internal representation of document from infile
|
||
|
parser = etree.XMLParser(remove_comments=False)
|
||
|
tree = etree.parse(infile, parser=parser)
|
||
|
head = tree.getroot()
|
||
|
|
||
|
# print_tree( head, 0, 2 )
|
||
|
|
||
|
# Note: because link cleanup involves relative location of multiple tags, it must be separate and first
|
||
|
traverse_clean_links(head)
|
||
|
traverse_clean_other(head)
|
||
|
|
||
|
# Persist updates to output file
|
||
|
tree.write(outfile)
|
||
|
|
||
|
def print_tree(element, level, max_depth):
|
||
|
# Print current element
|
||
|
num_children = element.__len__()
|
||
|
indent = ' '.ljust(level+1)
|
||
|
|
||
|
if level < max_depth:
|
||
|
print indent, 'Tag: ', element.tag, ' Attrib: ', element.attrib, ' Text: >', element.text, '< Num children: ', num_children
|
||
|
|
||
|
for i in range(num_children):
|
||
|
child = element.__getitem__(i)
|
||
|
print_tree(child, level+1, max_depth)
|
||
|
|
||
|
def traverse_clean_sections(element):
|
||
|
section_blacklist = ['Navigation', 'Table Of Contents']
|
||
|
|
||
|
# Walk children looking for next set of <section> tags, opening include files if necessary
|
||
|
num_children = element.__len__()
|
||
|
i = 0;
|
||
|
while i < num_children:
|
||
|
child = element.__getitem__(i)
|
||
|
parent = element
|
||
|
|
||
|
# print 'DEBUG: clean sections, visiting node with tag: ' + child.tag
|
||
|
|
||
|
# Walk first level of tags, deleting info and any "blacklist" sections
|
||
|
if 'section' in child.tag:
|
||
|
num_sec_children = child.__len__()
|
||
|
|
||
|
title = ''
|
||
|
if num_sec_children > 0:
|
||
|
first_grandchild = child.__getitem__(0)
|
||
|
if first_grandchild.__len__() == 0:
|
||
|
title = child.__getitem__(0).text
|
||
|
else:
|
||
|
# This makes me nervous, not sure how well it will work...
|
||
|
title = first_grandchild.__getitem__(0).text
|
||
|
# print 'Section title found: ' + title
|
||
|
|
||
|
if title in section_blacklist:
|
||
|
# Delete section
|
||
|
# print 'DEBUG: Deleted blacklist section ' + title
|
||
|
parent.remove(child)
|
||
|
num_children = num_children-1
|
||
|
else:
|
||
|
traverse_clean_sections(child)
|
||
|
i = i+1
|
||
|
else:
|
||
|
i=i+1
|
||
|
|
||
|
def eliminate_top_section(head):
|
||
|
|
||
|
# Remove <info> and <index> sections
|
||
|
for child in head.getchildren():
|
||
|
if 'info' in child.tag or 'index' in child.tag:
|
||
|
# print 'DEBUG: unneeded top level tag: ' + child.tag
|
||
|
head.remove(child)
|
||
|
|
||
|
# Eliminate head section which really is title
|
||
|
if head.__len__() == 1:
|
||
|
first_section = head.__getitem__(0)
|
||
|
|
||
|
if not 'section' in first_section.tag:
|
||
|
print 'Error: Bad assumption. Top tag in document is not a section.'
|
||
|
sys.exit(-36)
|
||
|
|
||
|
# print 'DEBUG: first section -- tag: ' + first_section.tag + ' num children: ' + str(first_section.__len__())
|
||
|
|
||
|
for child in first_section.getchildren():
|
||
|
# print 'DEBUG: child -- tag: ' + child.tag + ' num children: ' + str(child.__len__())
|
||
|
|
||
|
# Promote sections
|
||
|
if 'section' in child.tag:
|
||
|
first_section.remove(child);
|
||
|
head.append(child);
|
||
|
# print 'DEBUG: Promoting child -- tag: ' + child.tag
|
||
|
|
||
|
head.remove(first_section)
|
||
|
|
||
|
else:
|
||
|
print 'Error: Bad assumption. Too many sections (' + str(head.__len__()) + ') found in base document.'
|
||
|
sys.exit(-13)
|
||
|
|
||
|
|
||
|
def transform_head_sections(head):
|
||
|
|
||
|
num_chapter = 0
|
||
|
|
||
|
for child in head.getchildren():
|
||
|
if 'section' in child.tag:
|
||
|
child.tag = child.tag.replace('section','chapter')
|
||
|
num_chapter = num_chapter+1
|
||
|
|
||
|
if num_chapter == 0:
|
||
|
print 'Error: No chapters found in document'
|
||
|
sys.exit(-6)
|
||
|
|
||
|
|
||
|
def convert_structure(infile, outfile):
|
||
|
|
||
|
# Create internal representation of document from infile
|
||
|
parser = etree.XMLParser(remove_comments=False)
|
||
|
tree = etree.parse(infile, parser=parser)
|
||
|
head = tree.getroot()
|
||
|
|
||
|
# print 'DEBUG: Pre tree structure cleanup...'
|
||
|
# print_tree(head, 0, 3)
|
||
|
|
||
|
if 'article' in head.tag:
|
||
|
head.tag = 'book'
|
||
|
|
||
|
# Clear attributes
|
||
|
for attrib in head.attrib.keys():
|
||
|
head.attrib.pop(attrib, None)
|
||
|
if head.attrib.items() != []:
|
||
|
print 'Error: Section attributes not removed. ', head.attrib.items(), ' items remain -- ', head.attrib.keys()
|
||
|
sys.exit(-5)
|
||
|
else:
|
||
|
print 'Toc file contains ', head.tag, 'tag, not <article>'
|
||
|
sys.exit(-4)
|
||
|
|
||
|
# Traverse tree sections, removing nodes as needed
|
||
|
traverse_clean_sections(head)
|
||
|
|
||
|
# Eliminate first section, placeholder for document title
|
||
|
eliminate_top_section(head)
|
||
|
|
||
|
# Traverse remaining top level <section> and convert to <chapter>
|
||
|
transform_head_sections(head)
|
||
|
|
||
|
# print 'DEBUG: Post tree structure cleanup...'
|
||
|
# print_tree(head, 0, 2)
|
||
|
|
||
|
# Persist updates to output file
|
||
|
tree.write(outfile)
|
||
|
|
||
|
|
||
|
def remove_book_tags(old_file, new_file):
|
||
|
with open(old_file, 'r') as input:
|
||
|
with open(new_file, 'wb') as output:
|
||
|
for line in input:
|
||
|
if '<book' not in line and '</book>' not in line:
|
||
|
output.write(line)
|
||
|
|
||
|
def insert_toc_into_book(toc_file, book_file):
|
||
|
book_file_bak = book_file+'.bak'
|
||
|
shutil.copy2(book_file, book_file_bak)
|
||
|
key_string = '<!--TBD-->'
|
||
|
inserted_toc = False
|
||
|
|
||
|
with open(book_file_bak, 'r') as input:
|
||
|
with open(book_file, 'wb') as output:
|
||
|
for line in input:
|
||
|
if key_string not in line:
|
||
|
output.write(line)
|
||
|
else:
|
||
|
inserted_toc = True
|
||
|
# Write toc_file contents
|
||
|
with open(toc_file, 'r') as input_toc:
|
||
|
for line_toc in input_toc:
|
||
|
output.write(line_toc)
|
||
|
|
||
|
if not inserted_toc:
|
||
|
print 'Error: key string of "', key_string, '" not found in ', book_file
|
||
|
sys.exit(-7)
|
||
|
|
||
|
def build_revhistory(book_file):
|
||
|
# Variables for formating git log
|
||
|
log_format = '%h%x01%an%x01%ad%x01%s%x02'
|
||
|
log_fields = ['id', 'author', 'date', 'subject']
|
||
|
|
||
|
# Retrieve log
|
||
|
pipe = Popen('git log --date=iso --format="%s" -- . .' % log_format, shell=True, stdout=PIPE)
|
||
|
log, _ = pipe.communicate()
|
||
|
|
||
|
# Substitute for problem characters: &, <, >
|
||
|
log = log.replace('&','&').replace('<','<').replace('>','>')
|
||
|
|
||
|
# Remove newlines, trailing end-of-record (0x02), and then split at end-of-record
|
||
|
log = log.replace('\n','').strip('\x02').split('\x02')
|
||
|
|
||
|
# Split records into individual fields
|
||
|
log = [row.split('\01') for row in log]
|
||
|
|
||
|
# Create dictionary using field names
|
||
|
log = [dict(zip(log_fields, row)) for row in log]
|
||
|
|
||
|
# Format log into revision history
|
||
|
revision = '<revhistory>\n'
|
||
|
for entry in log:
|
||
|
revision = revision + '<revision><date>' + entry['date'].split(' ')[0] + '</date><revdescription><para>' +\
|
||
|
entry['subject'] + ' (' + entry['id'] + ')</para></revdescription></revision>\n'
|
||
|
revision = revision + '</revhistory>\n'
|
||
|
|
||
|
# Update file
|
||
|
rev_str = '<revhistory>TBD</revhistory>'
|
||
|
update_file(book_file, rev_str, revision)
|
||
|
|
||
|
|
||
|
def main(argv):
|
||
|
master_git_url = 'https://github.com/OpenPOWERFoundation/Docs-Master.git'
|
||
|
template_git_url = 'https://github.com/OpenPOWERFoundation/Docs-Template.git'
|
||
|
html_dir = ''
|
||
|
build_dir = ''
|
||
|
db_dir = ''
|
||
|
master_dir = ''
|
||
|
template_dir = ''
|
||
|
toc_file = master_doc+'.xml'
|
||
|
|
||
|
try:
|
||
|
opts, args = getopt.getopt(argv,"hs:b:d:m:t:",["htmldir","builddir=","docbookdir=","masterdir=","templatedir="])
|
||
|
except getopt.GetoptError:
|
||
|
print 'Invalid option specified. Usage:'
|
||
|
print ' opf_html2db.py -s <htmldir> -b <builddir> -d <docbookdir> -m <masterdir> -t <templatedir>'
|
||
|
sys.exit(-1)
|
||
|
for opt, arg in opts:
|
||
|
if opt == '-h':
|
||
|
print 'opf_hmtl2db.py -s <htmldir> -b <builddir> -d <docbookdir> -m <masterdir> -t <templatedir>'
|
||
|
sys.exit(0)
|
||
|
elif opt in ("-s", "--htmldir"):
|
||
|
html_dir = arg
|
||
|
elif opt in ("-b", "--builddir"):
|
||
|
build_dir = arg
|
||
|
elif opt in ("-d", "--docbookdir"):
|
||
|
db_dir = arg
|
||
|
elif opt in ("-m", "--masterdir"):
|
||
|
master_dir = arg
|
||
|
elif opt in ("-t", "--templatedir"):
|
||
|
template_dir = arg
|
||
|
|
||
|
# Verify html directory, error if not found
|
||
|
if not os.path.exists(html_dir):
|
||
|
print 'ERROR: ' + html_dir + ' does not exist. Please specify path to directory containing single html file.'
|
||
|
sys.exit(-11)
|
||
|
|
||
|
# Generate path to single file
|
||
|
# NOTE: assumption is that file name is always "index.html" (master_doc). If this doesn't prove true, may need to use variable.
|
||
|
html_file_src = os.path.join(html_dir, master_doc + '.html')
|
||
|
|
||
|
if not os.path.isfile(html_file_src):
|
||
|
print 'ERROR: ' + html_file_src + ' does not exist. Please verify path to single html file and file name.'
|
||
|
sys.exit(-12)
|
||
|
|
||
|
# Convert html file to xml and place in db directory
|
||
|
if not os.path.exists(db_dir):
|
||
|
print 'Making docbook build directory ' + db_dir
|
||
|
os.path.makedirs(db_dir)
|
||
|
|
||
|
db_file = os.path.join(db_dir, project + '.xml')
|
||
|
if os.path.exists(db_file):
|
||
|
os.remove(db_file)
|
||
|
|
||
|
# Clean up herold html output
|
||
|
print 'Cleaning up html file before processing'
|
||
|
html_file = os.path.join(db_dir, master_doc + '.html')
|
||
|
html_file_tmp1 = html_file + '.tmp1'
|
||
|
shutil.copy2(html_file_src, html_file)
|
||
|
cleanup_html(html_file, html_file_tmp1)
|
||
|
|
||
|
print 'Converting html file to XML...'
|
||
|
print subprocess.check_output(['herold', '-i', html_file_tmp1, '-o', db_file])
|
||
|
|
||
|
# Clone a new Master Directory
|
||
|
print 'Cloning new Docs-Master directory...'
|
||
|
if os.path.exists(master_dir):
|
||
|
shutil.rmtree(master_dir)
|
||
|
Repo.clone_from(master_git_url, master_dir)
|
||
|
|
||
|
# Clone a new Template Directory
|
||
|
print 'Cloning new Docs-Template directory...'
|
||
|
if os.path.exists(template_dir):
|
||
|
shutil.rmtree(template_dir)
|
||
|
Repo.clone_from(template_git_url, template_dir)
|
||
|
|
||
|
# Create the new XML file *****
|
||
|
rst_template_dir = os.path.join(template_dir, 'rst_template')
|
||
|
full_toc_file = os.path.join(rst_template_dir, toc_file)
|
||
|
shutil.copy2(db_file, full_toc_file)
|
||
|
book_file = os.path.join(rst_template_dir, 'bk_main.xml')
|
||
|
|
||
|
# Update all file in opf_docbook_settings with tag/value combinations specified
|
||
|
print 'Updating Docbook files with settings from conf.py...'
|
||
|
for f in opf_docbook_settings.keys():
|
||
|
filename = os.path.join(rst_template_dir, f)
|
||
|
tags = opf_docbook_settings[f]
|
||
|
|
||
|
for tag in tags:
|
||
|
value = opf_docbook_settings[f][tag]
|
||
|
|
||
|
if value != '':
|
||
|
new_str = '<'+tag+'>'+value+'</'+tag+'>'
|
||
|
else:
|
||
|
new_str = ''
|
||
|
|
||
|
old_str = '<'+tag+'>TBD</'+tag+'>'
|
||
|
update_file(filename, old_str, new_str)
|
||
|
|
||
|
# Parse TOC file, convert high level tag to "book" and write back out to .tmp1 file
|
||
|
print 'Cleaning up Docbook file structure...'
|
||
|
full_toc_file_tmp1 = full_toc_file+'.tmp1'
|
||
|
full_toc_file_tmp2 = full_toc_file+'.tmp2'
|
||
|
full_toc_file_tmp3 = full_toc_file+'.tmp3'
|
||
|
|
||
|
# Walk document correcting XML errors
|
||
|
cleanup_xml( full_toc_file, full_toc_file_tmp1 )
|
||
|
|
||
|
# Remove extraneous sections
|
||
|
convert_structure( full_toc_file_tmp1, full_toc_file_tmp2 )
|
||
|
|
||
|
# Eliminate <book> and <title> tags in .tmp1 and write to .tmp2 file
|
||
|
remove_book_tags(full_toc_file_tmp2, full_toc_file_tmp3)
|
||
|
|
||
|
# Update link to first file
|
||
|
insert_toc_into_book(full_toc_file_tmp3, book_file)
|
||
|
|
||
|
# Create revision history from Git Log
|
||
|
print 'Building document revision history from git log...'
|
||
|
build_revhistory(book_file)
|
||
|
|
||
|
# TODO: Remove this hack after rst_template bk_main gets updated
|
||
|
update_file(book_file, 'xmlns:xlink', 'xmlns:xl')
|
||
|
|
||
|
# Perform build of Docbook
|
||
|
print 'Building Docbook PDF and HTML output in Maven...'
|
||
|
maven_log_file = 'build.log'
|
||
|
maven_build = 'cd ' + rst_template_dir + '; mvn generate-sources 2>&1 | tee ' + maven_log_file + ''
|
||
|
pipe = Popen(maven_build, shell=True)
|
||
|
log, err = pipe.communicate()
|
||
|
|
||
|
if pipe.returncode != 0:
|
||
|
print "Build failed with return code:%s" % pipe.returncode
|
||
|
print "See %s/build.log for more details" & rst_template_dir
|
||
|
|
||
|
# Copy output to better location
|
||
|
print 'Copying build output...'
|
||
|
bld_out_dir = os.path.join(rst_template_dir, 'target/docbkx/webhelp')
|
||
|
html_head = os.path.join(bld_out_dir, opf_docbook_settings['pom.xml']['webhelpDirname'] + '/index.html')
|
||
|
if os.path.exists(bld_out_dir) and os.path.exists(html_head):
|
||
|
doc_dir = os.path.join(build_dir, 'docbook/opf_docbook')
|
||
|
|
||
|
if os.path.exists(doc_dir):
|
||
|
shutil.rmtree(doc_dir)
|
||
|
shutil.copytree(bld_out_dir, doc_dir)
|
||
|
print "Build successful. Output files located in %s" % os.path.join(doc_dir, opf_docbook_settings['pom.xml']['webhelpDirname'])
|
||
|
|
||
|
sys.exit(0)
|
||
|
|
||
|
else:
|
||
|
print "Docbook build failed. Check logfile %s for details." % os.path.join(rst_template_dir, maven_log_file)
|
||
|
sys.exit(-10)
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main(sys.argv[1:])
|