The three arguments I used were:
- toc-input.txt [a snippet where I copied the MsoToc1 lines from mynovel.htm for input. I suppose you COULD use mynovel.htm, but you will have to remove the first entry because it is the text "Table of Contents". You'll see what I mean when you bring mynovel.htm into Notepad++
- mynovel
- mynovel.htm
- toc-ncx.txt
- toc-html.txt [unused because I use Calibre later]
Import this Java code into your Eclipse Workspace and run it with Configuration: toc-input.txt, mynovel, mynovel.htm
import java.io.*;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
Generate Table Of Contents files, toc-ncx.txt and toc-html.txt for cut/paste into toc.ncx and toc.htm for KindleGen (beware of special characters like "smart quotes" that need to be corrected manually
*/
public final class NcxGenTOC {
/** Requires three arguments - file with cut/paste of MsoToc1 entries, and document title, document html filename */
public static void main(String... aArgs) throws IOException {
String inFileName = aArgs[0];
String docTitle = aArgs[1];
String htmlFileName = aArgs[2];
NcxGenTOC test = new NcxGenTOC(
inFileName, docTitle, htmlFileName
);
test.convert();
}
/** Constructor. */
NcxGenTOC(String aFileName, String aDocTitle, String aHtmlFileName){
fFileName = aFileName;
fDocTitle = aDocTitle;
fHtmlFileName = aHtmlFileName;
}
/** Generate the cut/paste files */
void convert() throws IOException {
String header = new String();
String headerRaw;
String refPt = new String();
StringBuilder line = new StringBuilder();
StringBuilder htmlLine = new StringBuilder();
log("creating output file.");
Writer out = new OutputStreamWriter(new FileOutputStream(outfile), encoding);
Writer htmlOut = new OutputStreamWriter(new FileOutputStream(tocHtml), encoding);
log("Reading from file: " + fFileName);
String text = new String();
String NL = System.getProperty("line.separator");
Scanner scanner = new Scanner(new FileInputStream(fFileName), "UTF-8");
out.write("<docTitle>" + NL);
line.append("<text>");
line.append(fDocTitle);
line.append("</text>");
out.write(line + NL);
out.write("</docTitle>" + NL);
out.write("<navMap>" + NL);
Pattern tocRef = Pattern.compile("\"([^\"]*)\"");
Pattern tocHead = Pattern.compile("\">[^(\">)^(</)]*</");
String contentSrc = new String("<content src=\"");
String navPointStart = new String ("<navPoint id=\"navPoint-");
String navPointPlay = new String ("\" playOrder=\"");
String navPointEnd = new String ("\">");
Integer i = 0;
try {
while (scanner.hasNextLine()){
text = scanner.nextLine();
// Specific for MSFT Word generated entries, tweak as needed
if(text.startsWith("<div class="MsoToc1">")) {
i++;
line.append("<navLabel>" + NL);
line.append("<text>");
log(i.toString() + ": " + text);
line.setLength(0);
line.append(navPointStart);
line.append(i.toString());
line.append(navPointPlay);
line.append(i.toString());
line.append(navPointEnd + NL);
out.write(line + NL);
Matcher mH = tocHead.matcher(text);
while (mH.find()) {
headerRaw = new String(mH.group(0));
header = headerRaw.substring(2,headerRaw.length()-2);
//strip the first and last two characters
log(header);
line.setLength(0);
line.append("<navLabel>" + NL);
line.append("<text>");
line.append(header);
line.append("</text>" + NL);
line.append("</navLabel>" + NL);
out.write(line + NL);
} //end while
Matcher mR = tocRef.matcher(text);
while (mR.find()) {
line.setLength(0);
line.append(contentSrc);
line.append(fHtmlFileName);
refPt = mR.group(1);
log(refPt);
line.append(refPt);
line.append("\"/>" + NL);
line.append("</navPoint>" + NL);
out.write(line + NL);
} //end while
htmlLine.setLength(0);
htmlLine.append("<a href=\"");
htmlLine.append(fHtmlFileName);
htmlLine.append(refPt);
htmlLine.append("\">");
htmlLine.append(header);
htmlLine.append("</a><br />");
htmlOut.write(htmlLine + NL);
} //end if
} //end while
} //end try
finally {
scanner.close();
out.write("</navMap>" + NL);
out.close();
htmlOut.close();
}
log("Files written: " + outfile + " and " + tocHtml);
} // end convert
// PRIVATE
private final String fFileName;
private final String fDocTitle;
private final String fHtmlFileName;
String encoding = "UTF-8";
String outfile = "toc-ncx.txt";
String tocHtml = "toc-html.txt";
private void log(String aMessage){
System.out.println(aMessage);
}
}
No comments:
Post a Comment