104 lines
4.6 KiB
Java
104 lines
4.6 KiB
Java
package io.gitlab.jfronny.libjf.translate.impl.google;
|
|
|
|
import io.gitlab.jfronny.commons.http.client.HttpClient;
|
|
import io.gitlab.jfronny.libjf.translate.api.TranslateException;
|
|
import io.gitlab.jfronny.libjf.translate.impl.AbstractTranslateService;
|
|
import org.apache.commons.lang3.StringEscapeUtils;
|
|
|
|
import java.io.IOException;
|
|
import java.net.URISyntaxException;
|
|
import java.net.URLEncoder;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.util.*;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
public class GoogleTranslateService extends AbstractTranslateService<GoogleTranslateLanguage> {
|
|
public static final String NAME = "Google";
|
|
private static GoogleTranslateService INSTANCE;
|
|
private static final Pattern TRANSLATION_RESULT = Pattern.compile("class=\"result-container\">([^<]*)</div>", Pattern.MULTILINE);
|
|
private static final Pattern LANGUAGE_KEY = Pattern.compile("<div class=\"language-item\"><a href=\"\\./m\\?sl&tl=([a-zA-Z\\-]+)&hl=[a-zA-Z\\-]+\">([^<]+)</a></div>", Pattern.MULTILINE);
|
|
private final Map<String, GoogleTranslateLanguage> knownLanguages;
|
|
|
|
public static GoogleTranslateService get() throws URISyntaxException, IOException {
|
|
if (INSTANCE == null) {
|
|
INSTANCE = new GoogleTranslateService();
|
|
}
|
|
return INSTANCE;
|
|
}
|
|
|
|
private GoogleTranslateService() throws URISyntaxException, IOException {
|
|
Map<String, GoogleTranslateLanguage> knownLanguages = new HashMap<>();
|
|
Matcher matcher = LANGUAGE_KEY.matcher(HttpClient.get("https://translate.google.com/m?mui=tl").sendString());
|
|
while (matcher.find()) {
|
|
String id = matcher.group(1);
|
|
String name = matcher.group(2);
|
|
knownLanguages.put(id, new GoogleTranslateLanguage(name, id));
|
|
}
|
|
if (knownLanguages.isEmpty())
|
|
throw new IOException("Could not detect languages, Google likely changed the site. Please inform the maintainer of LibJF");
|
|
this.knownLanguages = Map.copyOf(knownLanguages);
|
|
}
|
|
|
|
@Override
|
|
protected GoogleTranslateLanguage getAutoDetectLang() {
|
|
return GoogleTranslateLanguage.AUTO_DETECT;
|
|
}
|
|
|
|
@Override
|
|
protected String performTranslate(String textToTranslate, GoogleTranslateLanguage translateFrom, GoogleTranslateLanguage translateTo) throws Exception {
|
|
String pageSource = "";
|
|
try {
|
|
pageSource = getPageSource(textToTranslate, translateFrom.getIdentifier(), translateTo.getIdentifier());
|
|
Matcher matcher = TRANSLATION_RESULT.matcher(pageSource);
|
|
if (matcher.find()) {
|
|
String match = matcher.group(1);
|
|
if (match != null && !match.isEmpty()) {
|
|
return StringEscapeUtils.unescapeHtml4(match); //TODO use commons-text once that is shipped with Minecraft
|
|
}
|
|
}
|
|
throw new TranslateException("Could not translate \"" + textToTranslate + "\": result page couldn't be parsed");
|
|
} catch (Exception e) {
|
|
try {
|
|
Path p = Files.createTempFile("translater-pagedump-", ".html").toAbsolutePath();
|
|
Files.writeString(p, pageSource);
|
|
throw new TranslateException("Could not translate string, see dumped page at " + p, e);
|
|
} catch (IOException ioe) {
|
|
throw new TranslateException("Could not translate string and the page could not be dumped", ioe);
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public GoogleTranslateLanguage detect(String text) throws TranslateException {
|
|
return GoogleTranslateLanguage.AUTO_DETECT;
|
|
}
|
|
|
|
@Override
|
|
public GoogleTranslateLanguage parseLang(String lang) {
|
|
return knownLanguages.getOrDefault(lang, GoogleTranslateLanguage.AUTO_DETECT);
|
|
}
|
|
|
|
@Override
|
|
public List<GoogleTranslateLanguage> getAvailableLanguages() {
|
|
List<GoogleTranslateLanguage> langs = new ArrayList<>(knownLanguages.values());
|
|
langs.remove(GoogleTranslateLanguage.AUTO_DETECT);
|
|
return langs;
|
|
}
|
|
|
|
@Override
|
|
public String getName() {
|
|
return NAME;
|
|
}
|
|
|
|
private static String getPageSource(String textToTranslate, String translateFrom, String translateTo) throws URISyntaxException, IOException {
|
|
if (textToTranslate == null)
|
|
return null;
|
|
String pageUrl = String.format("https://translate.google.com/m?hl=en&sl=%s&tl=%s&ie=UTF-8&prev=_m&q=%s",
|
|
translateFrom, translateTo, URLEncoder.encode(textToTranslate.trim(), StandardCharsets.UTF_8));
|
|
return HttpClient.get(pageUrl).sendString();
|
|
}
|
|
}
|