diff --git a/.travis.yml b/.travis.yml
index c6a4d3f7..a4cf4957 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,7 @@ language: python
dist: precise
python:
- - "3.6"
+ - "3.6.5"
before_install:
- export TZ=Europe/Brussels
@@ -19,6 +19,7 @@ install:
- pip freeze
# Install and compile libsvm and liblinear
- sudo apt-get install -y build-essential
+ - pip install translate
- git clone https://github.com/cjlin1/libsvm
- cd libsvm; make lib; sudo cp libsvm.so.2 /lib; sudo ln -s /lib/libsvm.so.2 /lib/libsvm.so; cd ..
- git clone https://github.com/cjlin1/liblinear
diff --git a/examples/09-malayalam/malayalam news classification/Readme.md b/examples/09-malayalam/malayalam news classification/Readme.md
new file mode 100644
index 00000000..61ce3d75
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/Readme.md
@@ -0,0 +1,14 @@
+Malayalam News Classification
+=============================
+
+This example shows you how to do a search query for online news in malayalam language and the search results are classified into :
+Buisiness, entertainment,sports,Kerala,India.
+
Here Google is the used search engine.Open the code sample pattern_news.py and enter a search keyword in malayalam.
+
+
Before running the program, download the folder News from the following link and use it in the same folder as the sample code
+
+
+[Download Link to the news file](https://drive.google.com/open?id=1HPtrsoL9cX70rZ31lWgmPCjdbZfN_zjG)
+
+
+
diff --git a/examples/09-malayalam/malayalam news classification/pattern_news.py b/examples/09-malayalam/malayalam news classification/pattern_news.py
new file mode 100644
index 00000000..516b3877
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/pattern_news.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Apr 2 02:17:22 2019
+
+@author: abhijithneilabraham
+"""
+
+
+from pattern.web import Bing, SEARCH, plaintext,Google
+from ulmfit import ULMFiT
+engine = Google(license=key)
+searched=[]
+search_key='സഞ്ജു സാംസൺ'
+
+for result in engine.search(search_key, type=SEARCH, start=1):
+ print(repr(plaintext(result.text)))
+ searched.append(repr(plaintext(result.text)))
+print(len(searched))
+
+model = ULMFiT("news/")
+for i in searched:
+ x=model.predict(i)
+
+ print(x['intent'])
+
diff --git a/examples/09-malayalam/malayalam news classification/ulmfit.py b/examples/09-malayalam/malayalam news classification/ulmfit.py
new file mode 100644
index 00000000..cb84300e
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/ulmfit.py
@@ -0,0 +1,172 @@
+import numpy as np
+from fastai.text import *
+from fastai.lm_rnn import get_rnn_classifer
+import html
+from nltk import word_tokenize
+
+
+class Tokenizer():
+ def __init__(self, lang='en'):
+ pass
+
+ def spacy_tok(self,x):
+ return word_tokenize(x)
+
+ def proc_text(self, s):
+ return self.spacy_tok(s)
+
+ @staticmethod
+ def proc_all(ss, lang):
+ tok = Tokenizer(lang)
+ return [tok.proc_text(s) for s in ss]
+
+ @staticmethod
+ def proc_all_mp(ss, lang='en'):
+ ncpus = num_cpus()//2
+ with ProcessPoolExecutor(ncpus) as e:
+ return sum(e.map(Tokenizer.proc_all, ss, [lang]*len(ss)), [])
+
+
+class ULMFiT:
+
+ def __init__(self,model: str):
+ model_path = Path(model)
+ itos_filename = model_path/"news_lm"/"tmp"/'itos.pkl'
+ trained_classifier_filename = model_path/'models'/'clas_2.h5'
+ label2index = model_path/"news_clas"/"l2i.npy"
+ self.l2i = {v:k for k,v in np.load(label2index).item().items()}
+ self.stoi, self.model = self.load_model(itos_filename, trained_classifier_filename)
+ self.re1 = re.compile(r' +')
+
+ def load_model(self,itos_filename, classifier_filename):
+ """Load the classifier and int to string mapping
+
+ Args:
+ itos_filename (str): The filename of the int to string mapping file (usually called itos.pkl)
+ classifier_filename (str): The filename of the trained classifier
+
+ Returns:
+ string to int mapping, trained classifer model
+ """
+
+ # load the int to string mapping file
+ itos = pickle.load(Path(itos_filename).open('rb'))
+ # turn it into a string to int mapping (which is what we need)
+ stoi = collections.defaultdict(lambda:0, {str(v):int(k) for k,v in enumerate(itos)})
+
+ # these parameters aren't used, but this is the easiest way to get a model
+ bptt,em_sz,nh,nl = 70,400,1150,3
+ dps = np.array([0.4,0.5,0.05,0.3,0.4])*0.5
+ num_classes = len(self.l2i) # this is the number of classes we want to predict
+ vs = len(itos)
+
+ model = get_rnn_classifer(bptt, 20*70, num_classes, vs, emb_sz=em_sz, n_hid=nh, n_layers=nl, pad_token=1,
+ layers=[em_sz*3, 50, num_classes], drops=[dps[4], 0.1],
+ dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3])
+
+ # load the trained classifier
+ model.load_state_dict(torch.load(classifier_filename, map_location=lambda storage, loc: storage))
+
+ # put the classifier into evaluation mode
+ model.reset()
+ model.eval()
+
+ return stoi, model
+
+
+ def softmax(self,x):
+ '''
+ Numpy Softmax, via comments on https://gist.github.com/stober/1946926
+
+ >>> res = softmax(np.array([0, 200, 10]))
+ >>> np.sum(res)
+ 1.0
+ >>> np.all(np.abs(res - np.array([0, 1, 0])) < 0.0001)
+ True
+ >>> res = softmax(np.array([[0, 200, 10], [0, 10, 200], [200, 0, 10]]))
+ >>> np.sum(res, axis=1)
+ array([ 1., 1., 1.])
+ >>> res = softmax(np.array([[0, 200, 10], [0, 10, 200]]))
+ >>> np.sum(res, axis=1)
+ array([ 1., 1.])
+ '''
+ if x.ndim == 1:
+ x = x.reshape((1, -1))
+ max_x = np.max(x, axis=1).reshape((-1, 1))
+ exp_x = np.exp(x - max_x)
+ return exp_x / np.sum(exp_x, axis=1).reshape((-1, 1))
+
+ def fixup(self, x):
+
+ x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace(
+ 'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace(
+ '
', "\n").replace('\\"', '"').replace('','u_n').replace(' @.@ ','.').replace(
+ ' @-@ ','-').replace('\\', ' \\ ').replace('\u200d','').replace('\xa0',' ').replace(
+ '\u200c','').replace('“',' ').replace('”',' ').replace('"',' ').replace('\u200b','')
+ x = re.sub('[\(\[].*?[\)\]]', '', x)
+ x = re.sub('<[^<]+?>', '', x)
+ x = re.sub('[A-Za-z]+','ENG ', x)
+ x = re.sub(r'\d+.?(\d+)?','NUM ',x).replace("(","").replace(")","")
+ return self.re1.sub(' ', html.unescape(x))
+
+ def predict_text(self,stoi, model, text):
+ """Do the actual prediction on the text using the
+ model and mapping files passed
+ """
+
+ # prefix text with tokens:
+ # xbos: beginning of sentence
+ # xfld 1: we are using a single field here
+ input_str = self.fixup(text)
+# input_str = re.sub('[A-Za-z]+','ENG ', input_str)
+# input_str = re.sub(r'\d+.?(\d+)?','NUM ',input_str).replace("(","").replace(")","")
+
+ # predictions are done on arrays of input.
+ # We only have a single input, so turn it into a 1x1 array
+ texts = [input_str]
+
+ # tokenize using the fastai wrapper around spacy
+ tok = Tokenizer().proc_text(input_str)
+
+ # turn into integers for each word
+ encoded = [stoi[p] for p in tok]
+# print(encoded)
+ # we want a [x,1] array where x is the number
+ # of words inputted (including the prefix tokens)
+ ary = np.reshape(np.array(encoded),(-1,1))
+
+ # turn this array into a tensor
+ tensor = torch.from_numpy(ary)
+
+ # wrap in a torch Variable
+ variable = Variable(tensor)
+
+ # do the predictions
+ predictions = model(variable)
+
+ # convert back to numpy
+ numpy_preds = predictions[0].data.numpy()
+
+ return self.softmax(numpy_preds[0])[0], input_str
+
+ def predict(self,text):
+ intent = {}
+ output, fixed_text = self.predict_text(self.stoi, self.model, text)
+ intent_ranking = []
+ for i, out in enumerate(output):
+ temp = {"confidence": float(format(out, 'f')), "name": self.l2i[i]}
+ intent_ranking.append(temp)
+ intent_ranking = sorted(intent_ranking, key=lambda e: e['confidence'], reverse=True)
+ intent.update({
+ "intent": intent_ranking.pop(0),
+ "intent_ranking": intent_ranking
+ })
+ intent.update({"processed_text": fixed_text})
+ return intent#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 1 00:54:22 2019
+
+@author: abhijithneilabraham
+"""
+
diff --git a/pattern/web/__init__.py b/pattern/web/__init__.py
index c76cea88..78484a45 100644
--- a/pattern/web/__init__.py
+++ b/pattern/web/__init__.py
@@ -16,6 +16,7 @@
from builtins import str, bytes, dict, int, chr
from builtins import map, filter, zip
from builtins import object, range, next
+from translate import Translator
from .utils import get_url_query, get_form_action, stringify_values, json_iter_parse
@@ -2146,6 +2147,22 @@ def f(v):
else:
self._pagination[k] = id
return results
+
+
+
+ def translated(self,lang,query):
+ trans_results=self.search(query, start=1, count=10)
+ translator= Translator(to_lang=lang)
+ translation = translator.translate(trans_results)
+ return translation
+
+ '''
+ This translated takes the results from a search query and translates it to the language
+ specified by the lang keyword.
+ usage===>Twitter.translated("German","cat")
+
+ '''
+
def profile(self, query, start=1, count=10, **kwargs):
""" Returns a list of results for the given author id, alias or search query.