Regarding the program, all the PNGs are already retrieved with one page java code program. Remaining problem is to create msl files.
That is already done also, however needs some additional research. Third argument in "Smiley =" string must be corrected. Probably unicode escape code may be used to compose it.
BTW, code is the following
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FileUtils;
import org.jsoup.nodes.Document;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
/**
* Created by bykov on 13.07.2015.
*/
public class EmojiProcessor {
private static final Map<String, HashMap <String, String>> protocols = new HashMap<>();
static {
protocols.put("BW", new HashMap<String, String>());
protocols.get("BW").put("name", "BW");
protocols.get("BW").put("index", "3");
protocols.put("Apple", new HashMap<String, String>());
protocols.get("Apple").put("name", "Apple");
protocols.get("Apple").put("index", "4");
protocols.put("Twit", new HashMap<String, String>());
protocols.get("Twit").put("name", "Twit");
protocols.get("Twit").put("index", "6");
}
public static void main(String[] args) throws java.io.IOException{
//Uncomment to use online version
//Document doc = Jsoup.connect("http://unicode.org/emoji/charts/full-emoji-list.html").get();
File input = new File("data/Full Emoji Data.html");
Document doc = Jsoup.parse(input, "UTF-8");
String rownumber;
String character;
// Clean files and prepare msl files
for (HashMap protocol: protocols.values()) {
FileUtils.cleanDirectory(new File("data/" + protocol.get("name").toString() + "/"));
Files.deleteIfExists(Paths.get("data/" + protocol.get("name").toString() + ".msl"));
prepareMSLFiles(protocol.get("name").toString());
}
// Parse html file extract images and compile msl file
for (Element table: doc.select("table")) {
for (Element row : table.select("tr")) {
// Ignore headers
if ("Count".equals(row.child(0).text()))
continue;
else {
rownumber = row.child(0).text();
character = row.child(2).text();
}
// For each table record save image and add record to msl file
for (HashMap protocol: protocols.values()) {
if (!row.child((Integer.parseInt(protocol.get("index").toString()))).children().isEmpty()) {
savePicture(protocol.get("name").toString(), getImageData(row, Integer.parseInt(protocol.get("index").toString())), rownumber);
addMSLRecord(protocol.get("name").toString(),getPNGName(rownumber),character);
}
}
}
}
}
// Decodes Base64 data tag and save it as png file
private static void savePicture(String protocol, String img, String name) throws java.io.IOException{
byte[] data = Base64.decodeBase64(img);
String filename = "data/" + protocol + "/" + getPNGName(name);
OutputStream stream = new FileOutputStream(filename);
stream.write(data);
stream.close();
}
// Remove 'data:image/png;base64,' from the beginning
private static String getImageData(Element row, int index) {
return row.child(index).child(0).attr("src").substring(22);
}
// Left pad 5 leading zeros to PNG filename
private static String getPNGName(String name) {
return String.format("%1$" + 5 + "s", name).replace(' ', '0') + ".png";
}
// Add record to msl file for specified picture file and character
private static void addMSLRecord(String protocol, String pngname, String character) throws java.io.IOException{
String filename = "data/" + protocol + ".msl";
OutputStreamWriter stream = new OutputStreamWriter(new FileOutputStream(filename, true), "UTF-8");
BufferedWriter out = new BufferedWriter(stream);
out.write("Smiley = \"" + protocol + "\\" + pngname + "\",0,\"" + character + "\"");
out.newLine();
out.close();
}
// Create msl file and add headers
private static void prepareMSLFiles(String protocol) throws java.io.IOException{
DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd");
Date date = new Date();
String filename = "data/" + protocol + ".msl";
OutputStreamWriter stream = new OutputStreamWriter(new FileOutputStream(filename, true), "UTF-8");
BufferedWriter out = new BufferedWriter(stream);
out.write("Name = \"" + protocol + " Emoji set\""); out.newLine();
out.write("Author = \"Pavel Bykov\""); out.newLine();
out.write("Date = \"" + dateFormat.format(date) + "\""); out.newLine();
out.write("Version = \"1.0\""); out.newLine();
out.write("SelectionSize = 24, 24"); out.newLine();
out.write("WindowSize = 10, 8"); out.newLine();
out.newLine();
out.close();
}
}
Current results are also attached for Twitter protocol, they are the smallest )