/* * Word-based sort coding decoder * * Copyright (c) 2020 Project Nayuki * All rights reserved. Contact Nayuki for licensing. * https://www.nayuki.io/page/huffman-coding-english-words */ import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.EOFException; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; public final class SortTextDecoder { public static void main(String[] args) throws IOException { // Check arguments if (args.length != 2) { System.err.println("Usage: java SortTextDecoder Encoded.txt Decoded.txt"); System.exit(1); } // Variables to carry to next stage String text; Map codewordToWord = new HashMap(); String escapeCode = null; // Read all of input text file try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), StandardCharsets.UTF_8))) { // Read codebook while (true) { String line = in.readLine(); if (line == null) throw new EOFException(); if (line.equals("----------")) // End of codebook break; String[] parts = line.split(" ", 2); codewordToWord.put(parts[0], parts[1]); if (parts[1].equals("ESC")) escapeCode = parts[0]; } // Read payload text StringBuilder sb = new StringBuilder(); while (true) { int c = in.read(); if (c == -1) break; sb.append((char)c); } text = sb.toString(); } // Write output text file try (Writer out = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(args[1])), StandardCharsets.UTF_8)) { for (TextToken tok : TextTokenizer.tokenize(text)) { int type = tok.type; String val = tok.value; if (type == 4) // Symbol out.write(val); else if (escapeCode != null && 0 <= type && type <= 3 && val.startsWith(escapeCode)) // Escape out.write(val.substring(escapeCode.length())); // Delete prefix else if (0 <= type && type <= 2) { String key = val.toLowerCase(); if (!codewordToWord.containsKey(key)) throw new RuntimeException("Codeword not in codebook: " + key); String word = codewordToWord.get(key); if (type == 1) word = word.substring(0, 1).toUpperCase() + word.substring(1); // To title case if (type == 2) word = word.toUpperCase(); out.write(word); } else throw new RuntimeException("Invalid encoded text"); } } } }