LibJF/libjf-translate-v1/src/main/java/io/gitlab/jfronny/libjf/translate/impl/google/GoogleTranslateService.java

104 lines
4.6 KiB
Java

package io.gitlab.jfronny.libjf.translate.impl.google;
import io.gitlab.jfronny.commons.http.client.HttpClient;
import io.gitlab.jfronny.libjf.translate.api.TranslateException;
import io.gitlab.jfronny.libjf.translate.impl.AbstractTranslateService;
import org.apache.commons.lang3.StringEscapeUtils;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class GoogleTranslateService extends AbstractTranslateService<GoogleTranslateLanguage> {
public static final String NAME = "Google";
private static GoogleTranslateService INSTANCE;
private static final Pattern TRANSLATION_RESULT = Pattern.compile("class=\"result-container\">([^<]*)</div>", Pattern.MULTILINE);
private static final Pattern LANGUAGE_KEY = Pattern.compile("<div class=\"language-item\"><a href=\"\\./m\\?sl&amp;tl=([a-zA-Z\\-]+)&amp;hl=[a-zA-Z\\-]+\">([^<]+)</a></div>", Pattern.MULTILINE);
private final Map<String, GoogleTranslateLanguage> knownLanguages;
public static GoogleTranslateService get() throws URISyntaxException, IOException {
if (INSTANCE == null) {
INSTANCE = new GoogleTranslateService();
}
return INSTANCE;
}
private GoogleTranslateService() throws URISyntaxException, IOException {
Map<String, GoogleTranslateLanguage> knownLanguages = new HashMap<>();
Matcher matcher = LANGUAGE_KEY.matcher(HttpClient.get("https://translate.google.com/m?mui=tl").sendString());
while (matcher.find()) {
String id = matcher.group(1);
String name = matcher.group(2);
knownLanguages.put(id, new GoogleTranslateLanguage(name, id));
}
if (knownLanguages.isEmpty())
throw new IOException("Could not detect languages, Google likely changed the site. Please inform the maintainer of LibJF");
this.knownLanguages = Map.copyOf(knownLanguages);
}
@Override
protected GoogleTranslateLanguage getAutoDetectLang() {
return GoogleTranslateLanguage.AUTO_DETECT;
}
@Override
protected String performTranslate(String textToTranslate, GoogleTranslateLanguage translateFrom, GoogleTranslateLanguage translateTo) throws Exception {
String pageSource = "";
try {
pageSource = getPageSource(textToTranslate, translateFrom.getIdentifier(), translateTo.getIdentifier());
Matcher matcher = TRANSLATION_RESULT.matcher(pageSource);
if (matcher.find()) {
String match = matcher.group(1);
if (match != null && !match.isEmpty()) {
return StringEscapeUtils.unescapeHtml4(match); //TODO use commons-text once that is shipped with Minecraft
}
}
throw new TranslateException("Could not translate \"" + textToTranslate + "\": result page couldn't be parsed");
} catch (Exception e) {
try {
Path p = Files.createTempFile("translater-pagedump-", ".html").toAbsolutePath();
Files.writeString(p, pageSource);
throw new TranslateException("Could not translate string, see dumped page at " + p, e);
} catch (IOException ioe) {
throw new TranslateException("Could not translate string and the page could not be dumped", ioe);
}
}
}
@Override
public GoogleTranslateLanguage detect(String text) throws TranslateException {
return GoogleTranslateLanguage.AUTO_DETECT;
}
@Override
public GoogleTranslateLanguage parseLang(String lang) {
return knownLanguages.getOrDefault(lang, GoogleTranslateLanguage.AUTO_DETECT);
}
@Override
public List<GoogleTranslateLanguage> getAvailableLanguages() {
List<GoogleTranslateLanguage> langs = new ArrayList<>(knownLanguages.values());
langs.remove(GoogleTranslateLanguage.AUTO_DETECT);
return langs;
}
@Override
public String getName() {
return NAME;
}
private static String getPageSource(String textToTranslate, String translateFrom, String translateTo) throws URISyntaxException, IOException {
if (textToTranslate == null)
return null;
String pageUrl = String.format("https://translate.google.com/m?hl=en&sl=%s&tl=%s&ie=UTF-8&prev=_m&q=%s",
translateFrom, translateTo, URLEncoder.encode(textToTranslate.trim(), StandardCharsets.UTF_8));
return HttpClient.get(pageUrl).sendString();
}
}