/* * Word-based Huffman coding decoder * * Copyright (c) 2020 Project Nayuki * All rights reserved. Contact Nayuki for licensing. * https://www.nayuki.io/page/huffman-coding-english-words */ import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.EOFException; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; public final class HuffmanTextDecoder { public static void main(String[] args) throws IOException { // Check arguments if (args.length != 2) { System.err.println("Usage: java HuffmanTextEncoder Encoded.txt Decoded.txt"); System.exit(1); } // Start reading input text file try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), StandardCharsets.UTF_8))) { // Read codebook Map codewordToWords = new HashMap(); int maxCodeLen = 0; while (true) { String line = in.readLine(); if (line == null) throw new EOFException(); if (line.equals("----------")) // End of codebook break; String[] parts = line.split(" ", 2); codewordToWords.put(parts[0], parts[1]); maxCodeLen = Math.max(parts[0].length(), maxCodeLen); } // Write output text file try (Writer out = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(args[1])), StandardCharsets.UTF_8)) { StringBuilder code = new StringBuilder(); // Current partial codeword being accumulated while (true) { int c = in.read(); if (c == -1) { if (code.length() != 0) throw new RuntimeException("Unfinished codeword: " + code.toString()); break; } else if ('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') { code.append((char)c); if (codewordToWords.containsKey(code.toString())) { out.write(codewordToWords.get(code.toString())); code.delete(0, code.length()); } if (code.length() > maxCodeLen) { // This is not necessarily the shortest undecodable prefix of the current codeword throw new RuntimeException("Undecodable codeword prefix: " + code.toString()); } } else { // Symbol if (code.length() != 0) throw new RuntimeException("Unfinished codeword: " + code.toString()); out.write((char)c); } } } } } }