Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Google Translate TTS API blocked

Google implemented a captcha to block people from accessing the TTS translate API https://translate.google.com/translate_tts?ie=UTF-8&q=test&tl=zh-TW. I was using it in my mobile application. Now, it is not returning anything. How do I get around the captcha?

like image 422
ginsengtang Avatar asked Aug 17 '15 14:08

ginsengtang


3 Answers

Add the qualifier '&client=tw-ob' to the end of your query. https://translate.google.com/translate_tts?ie=UTF-8&q=test&tl=zh-TW&client=tw-ob

This answer no longer works consistently. Your ip address will be blocked by google temporarily if you abuse this too much.

like image 153
ginsengtang Avatar answered Nov 08 '22 00:11

ginsengtang


there are 3 main issues:

  1. you must include "client" in your query string (client=t seems to work).
  2. (in case you are trying to retrieve it using AJAX) the Referer of the HTTP request must be https://translate.google.com/
  3. "tk" field changes for every query, and it must be populated with a matching hash: tk = hash(q, TKK), where q is the text to be TTSed, and TKK is a var in the global scope when you load translate.google.com: (type 'window.TKK' in the console). see the hash function at the bottom of this reply (calcHash).

to summarize:

function generateGoogleTTSLink(q, tl, tkk) {
    var tk = calcHash(q, tkk);
    return `https://translate.google.com/translate_tts?ie=UTF-8&total=1&idx=0&client=t&ttsspeed=1&tl=${tl}&tk=${tk}&q=${q}&textlen=${q.length}`;
}

generateGoogleTTSLink('ciao', 'it', '410353.1336369826');
// see definition of "calcHash" in the bottom of this comment.

=> to get your hands on a TKK, you can open Google Translate website, then type "TKK" in developer tools' console (e.g.: "410353.1336369826").

NOTE that TKK value changes every hour, and so, old TKKs might get blocked at some point, and refreshing it may be necessary (although so far it seems like old keys can work for a LONG time).

if you DO wish to periodically refresh TKK, it can be automated pretty easily, but not if you're running your code from the browser.

you can find a full NodeJS implementation here: https://github.com/guyrotem/google-translate-server. it exposes a minimal TTS API (query, language), and is deployed to a free Heroku server, so you can test it online if you like.

function shiftLeftOrRightThenSumOrXor(num, opArray) {
	return opArray.reduce((acc, opString) => {
		var op1 = opString[1];	//	'+' | '-' ~ SUM | XOR
		var op2 = opString[0];	//	'+' | '^' ~ SLL | SRL
		var xd = opString[2];	//	[0-9a-f]

		var shiftAmount = hexCharAsNumber(xd);
		var mask = (op1 == '+') ? acc >>> shiftAmount : acc << shiftAmount;
		return (op2 == '+') ? (acc + mask & 0xffffffff) : (acc ^ mask);
	}, num);
}

function hexCharAsNumber(xd) {
	return (xd >= 'a') ? xd.charCodeAt(0) - 87 : Number(xd);
}

function transformQuery(query) {
	for (var e = [], f = 0, g = 0; g < query.length; g++) {
	  var l = query.charCodeAt(g);
	  if (l < 128) {
	  	e[f++] = l;					//	0{l[6-0]}
	  } else if (l < 2048) {
	  	e[f++] = l >> 6 | 0xC0;		//	110{l[10-6]}
	  	e[f++] = l & 0x3F | 0x80;	//	10{l[5-0]}
	  } else if (0xD800 == (l & 0xFC00) && g + 1 < query.length && 0xDC00 == (query.charCodeAt(g + 1) & 0xFC00)) {
	  	//	that's pretty rare... (avoid ovf?)
	  	l = (1 << 16) + ((l & 0x03FF) << 10) + (query.charCodeAt(++g) & 0x03FF);
	  	e[f++] = l >> 18 | 0xF0;		//	111100{l[9-8*]}
	  	e[f++] = l >> 12 & 0x3F | 0x80;	//	10{l[7*-2]}
	  	e[f++] = l & 0x3F | 0x80;		//	10{(l+1)[5-0]}
	  } else {
		e[f++] = l >> 12 | 0xE0;		//	1110{l[15-12]}
		e[f++] = l >> 6 & 0x3F | 0x80;	//	10{l[11-6]}
		e[f++] = l & 0x3F | 0x80;		//	10{l[5-0]}
	  }
	}
	return e;
}

function normalizeHash(encondindRound2) {
	if (encondindRound2 < 0) {
		encondindRound2 = (encondindRound2 & 0x7fffffff) + 0x80000000;
	}
	return encondindRound2 % 1E6;
}

function calcHash(query, windowTkk) {
	//	STEP 1: spread the the query char codes on a byte-array, 1-3 bytes per char
	var bytesArray = transformQuery(query);

	//	STEP 2: starting with TKK index, add the array from last step one-by-one, and do 2 rounds of shift+add/xor
	var d = windowTkk.split('.');
	var tkkIndex = Number(d[0]) || 0;
	var tkkKey = Number(d[1]) || 0;

	var encondingRound1 = bytesArray.reduce((acc, current) => {
		acc += current;
		return shiftLeftOrRightThenSumOrXor(acc, ['+-a', '^+6'])
	}, tkkIndex);

	//	STEP 3: apply 3 rounds of shift+add/xor and XOR with they TKK key
	var encondingRound2 = shiftLeftOrRightThenSumOrXor(encondingRound1, ['+-3', '^+b', '+-f']) ^ tkkKey;

	//	STEP 4: Normalize to 2s complement & format
	var normalizedResult = normalizeHash(encondingRound2);

	return normalizedResult.toString() + "." + (normalizedResult ^ tkkIndex)
}

// usage example:
var tk = calcHash('hola', '409837.2120040981');
console.log('tk=' + tk);
 // OUTPUT: 'tk=70528.480109'
like image 35
Guy Rotem Avatar answered Nov 08 '22 01:11

Guy Rotem


You can also try this format :

  1. pass q= urlencode format of your language (In JavaScript you can use the encodeURI() function & PHP has the rawurlencode() function)

  2. pass tl = language short name (suppose bangla = bn)

Now try this :

https://translate.google.com.vn/translate_tts?ie=UTF-8&q=%E0%A6%A2%E0%A6%BE%E0%A6%95%E0%A6%BE+&tl=bn&client=tw-ob

like image 5
Arman Hakim Sagar Avatar answered Nov 08 '22 01:11

Arman Hakim Sagar