From addd7afb67b410e84e2f40373457acddf2ca8a95 Mon Sep 17 00:00:00 2001
From: Abhijith Neil Abraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 17:44:59 +0530
Subject: [PATCH 01/12] uploading the example code

---
 .../malayalam news classification/pattern.py  |  25 +++
 .../malayalam news classification/ulmfit.py   | 172 ++++++++++++++++++
 2 files changed, 197 insertions(+)
 create mode 100644 examples/09-malayalam/malayalam news classification/pattern.py
 create mode 100644 examples/09-malayalam/malayalam news classification/ulmfit.py

diff --git a/examples/09-malayalam/malayalam news classification/pattern.py b/examples/09-malayalam/malayalam news classification/pattern.py
new file mode 100644
index 00000000..2e7d8b55
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/pattern.py	
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Apr  2 02:17:22 2019
+
+@author: abhijithneilabraham
+"""
+
+
+from pattern.web import Bing, SEARCH, plaintext,Google
+from ulmfit import ULMFiT
+engine = Google(license='AIzaSyCND8YQhyxQZU1E4y4gCzg8V61NQ61BYtw')
+searched=[]
+
+for result in engine.search('സഞ്ജു സാംസൺ', type=SEARCH, start=1):
+    print(repr(plaintext(result.text)))
+    searched.append(repr(plaintext(result.text)))
+print(len(searched))    
+
+model = ULMFiT("news/")
+for i in searched:
+    x=model.predict(i)
+    
+    print(x['intent'])
+    
\ No newline at end of file
diff --git a/examples/09-malayalam/malayalam news classification/ulmfit.py b/examples/09-malayalam/malayalam news classification/ulmfit.py
new file mode 100644
index 00000000..cb84300e
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/ulmfit.py	
@@ -0,0 +1,172 @@
+import numpy as np
+from fastai.text import *
+from fastai.lm_rnn import get_rnn_classifer
+import html
+from nltk import word_tokenize
+
+
+class Tokenizer():
+    def __init__(self, lang='en'):
+        pass
+
+    def spacy_tok(self,x):
+        return word_tokenize(x)
+
+    def proc_text(self, s):
+        return self.spacy_tok(s)
+
+    @staticmethod
+    def proc_all(ss, lang):
+        tok = Tokenizer(lang)
+        return [tok.proc_text(s) for s in ss]
+
+    @staticmethod
+    def proc_all_mp(ss, lang='en'):
+        ncpus = num_cpus()//2
+        with ProcessPoolExecutor(ncpus) as e:
+            return sum(e.map(Tokenizer.proc_all, ss, [lang]*len(ss)), [])
+
+
+class ULMFiT:
+
+    def __init__(self,model: str):
+        model_path = Path(model)
+        itos_filename = model_path/"news_lm"/"tmp"/'itos.pkl'
+        trained_classifier_filename = model_path/'models'/'clas_2.h5'
+        label2index = model_path/"news_clas"/"l2i.npy"
+        self.l2i = {v:k for k,v in np.load(label2index).item().items()}
+        self.stoi, self.model = self.load_model(itos_filename, trained_classifier_filename)
+        self.re1 = re.compile(r'  +')
+
+    def load_model(self,itos_filename, classifier_filename):
+        """Load the classifier and int to string mapping
+
+        Args:
+            itos_filename (str): The filename of the int to string mapping file (usually called itos.pkl)
+            classifier_filename (str): The filename of the trained classifier
+
+        Returns:
+            string to int mapping, trained classifer model
+        """
+
+        # load the int to string mapping file
+        itos = pickle.load(Path(itos_filename).open('rb'))
+        # turn it into a string to int mapping (which is what we need)
+        stoi = collections.defaultdict(lambda:0, {str(v):int(k) for k,v in enumerate(itos)})
+
+        # these parameters aren't used, but this is the easiest way to get a model
+        bptt,em_sz,nh,nl = 70,400,1150,3
+        dps = np.array([0.4,0.5,0.05,0.3,0.4])*0.5
+        num_classes = len(self.l2i) # this is the number of classes we want to predict
+        vs = len(itos)
+
+        model = get_rnn_classifer(bptt, 20*70, num_classes, vs, emb_sz=em_sz, n_hid=nh, n_layers=nl, pad_token=1,
+                layers=[em_sz*3, 50, num_classes], drops=[dps[4], 0.1],
+                dropouti=dps[0], wdrop=dps[1], dropoute=dps[2], dropouth=dps[3])
+
+        # load the trained classifier
+        model.load_state_dict(torch.load(classifier_filename, map_location=lambda storage, loc: storage))
+
+        # put the classifier into evaluation mode
+        model.reset()
+        model.eval()
+
+        return stoi, model
+
+
+    def softmax(self,x):
+        '''
+        Numpy Softmax, via comments on https://gist.github.com/stober/1946926
+
+        >>> res = softmax(np.array([0, 200, 10]))
+        >>> np.sum(res)
+        1.0
+        >>> np.all(np.abs(res - np.array([0, 1, 0])) < 0.0001)
+        True
+        >>> res = softmax(np.array([[0, 200, 10], [0, 10, 200], [200, 0, 10]]))
+        >>> np.sum(res, axis=1)
+        array([ 1.,  1.,  1.])
+        >>> res = softmax(np.array([[0, 200, 10], [0, 10, 200]]))
+        >>> np.sum(res, axis=1)
+        array([ 1.,  1.])
+        '''
+        if x.ndim == 1:
+            x = x.reshape((1, -1))
+        max_x = np.max(x, axis=1).reshape((-1, 1))
+        exp_x = np.exp(x - max_x)
+        return exp_x / np.sum(exp_x, axis=1).reshape((-1, 1))
+
+    def fixup(self, x):
+        
+        x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace(
+        'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace(
+        '<br />', "\n").replace('\\"', '"').replace('<unk>','u_n').replace(' @.@ ','.').replace(
+        ' @-@ ','-').replace('\\', ' \\ ').replace('\u200d','').replace('\xa0',' ').replace(
+        '\u200c','').replace('“',' ').replace('”',' ').replace('"',' ').replace('\u200b','')
+        x = re.sub('[\(\[].*?[\)\]]', '', x)
+        x = re.sub('<[^<]+?>', '', x)
+        x = re.sub('[A-Za-z]+','ENG ', x)
+        x = re.sub(r'\d+.?(\d+)?','NUM ',x).replace("(","").replace(")","")
+        return self.re1.sub(' ', html.unescape(x))
+
+    def predict_text(self,stoi, model, text):
+        """Do the actual prediction on the text using the
+            model and mapping files passed
+        """
+
+        # prefix text with tokens:
+        #   xbos: beginning of sentence
+        #   xfld 1: we are using a single field here
+        input_str = self.fixup(text)
+#         input_str = re.sub('[A-Za-z]+','ENG ', input_str)
+#         input_str = re.sub(r'\d+.?(\d+)?','NUM ',input_str).replace("(","").replace(")","")
+        
+        # predictions are done on arrays of input.
+        # We only have a single input, so turn it into a 1x1 array
+        texts = [input_str]
+
+        # tokenize using the fastai wrapper around spacy
+        tok = Tokenizer().proc_text(input_str)
+
+        # turn into integers for each word
+        encoded = [stoi[p] for p in tok]
+#         print(encoded)
+        # we want a [x,1] array where x is the number
+        #  of words inputted (including the prefix tokens)
+        ary = np.reshape(np.array(encoded),(-1,1))
+
+        # turn this array into a tensor
+        tensor = torch.from_numpy(ary)
+
+        # wrap in a torch Variable
+        variable = Variable(tensor)
+
+        # do the predictions
+        predictions = model(variable)
+
+        # convert back to numpy
+        numpy_preds = predictions[0].data.numpy()
+
+        return self.softmax(numpy_preds[0])[0], input_str
+
+    def predict(self,text):
+        intent = {}
+        output, fixed_text = self.predict_text(self.stoi, self.model, text)
+        intent_ranking = []
+        for i, out in enumerate(output):
+            temp = {"confidence": float(format(out, 'f')), "name": self.l2i[i]}
+            intent_ranking.append(temp)
+        intent_ranking = sorted(intent_ranking, key=lambda e: e['confidence'], reverse=True)
+        intent.update({
+                    "intent": intent_ranking.pop(0),
+                    "intent_ranking": intent_ranking
+        })
+        intent.update({"processed_text": fixed_text})
+        return intent#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr  1 00:54:22 2019
+
+@author: abhijithneilabraham
+"""
+

From cc94efe968b22c22770f40ffc253e13304d1286b Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:09:34 +0530
Subject: [PATCH 02/12] Create Readme.md

---
 .../malayalam news classification/Readme.md          | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 examples/09-malayalam/malayalam news classification/Readme.md

diff --git a/examples/09-malayalam/malayalam news classification/Readme.md b/examples/09-malayalam/malayalam news classification/Readme.md
new file mode 100644
index 00000000..65ddfa6b
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/Readme.md	
@@ -0,0 +1,12 @@
+Malayalam News Classification
+=============================
+
+This example shows you how to do a search query for online news in malayalam language and the search results are classified into :
+Buisiness, entertainment,sports,Kerala,India.
+<br>Here Google is the used search engine.Open the code sample <b>pattern.py</b> and enter a search keyword in malayalam.</br>
+
+<br>Before running the program, download the folder <b>News</b> from the following link and use it in the same folder as the sample code</br>
+<br>https://drive.google.com/open?id=1HPtrsoL9cX70rZ31lWgmPCjdbZfN_zjG</br>
+
+
+

From f21db91d7a45be5b0c5fa671d05126eda4e70146 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:11:25 +0530
Subject: [PATCH 03/12] Update Readme.md

---
 examples/09-malayalam/malayalam news classification/Readme.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/09-malayalam/malayalam news classification/Readme.md b/examples/09-malayalam/malayalam news classification/Readme.md
index 65ddfa6b..4f15f797 100644
--- a/examples/09-malayalam/malayalam news classification/Readme.md	
+++ b/examples/09-malayalam/malayalam news classification/Readme.md	
@@ -6,7 +6,9 @@ Buisiness, entertainment,sports,Kerala,India.
 <br>Here Google is the used search engine.Open the code sample <b>pattern.py</b> and enter a search keyword in malayalam.</br>
 
 <br>Before running the program, download the folder <b>News</b> from the following link and use it in the same folder as the sample code</br>
-<br>https://drive.google.com/open?id=1HPtrsoL9cX70rZ31lWgmPCjdbZfN_zjG</br>
+<br>
+
+[Link to the model](https://drive.google.com/open?id=1HPtrsoL9cX70rZ31lWgmPCjdbZfN_zjG)</br>
 
 
 

From 39b2d9bd00658bb7e7dbcaaa90b37e6b454551c8 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:12:16 +0530
Subject: [PATCH 04/12] Update Readme.md

---
 examples/09-malayalam/malayalam news classification/Readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/09-malayalam/malayalam news classification/Readme.md b/examples/09-malayalam/malayalam news classification/Readme.md
index 4f15f797..0107b4bb 100644
--- a/examples/09-malayalam/malayalam news classification/Readme.md	
+++ b/examples/09-malayalam/malayalam news classification/Readme.md	
@@ -8,7 +8,7 @@ Buisiness, entertainment,sports,Kerala,India.
 <br>Before running the program, download the folder <b>News</b> from the following link and use it in the same folder as the sample code</br>
 <br>
 
-[Link to the model](https://drive.google.com/open?id=1HPtrsoL9cX70rZ31lWgmPCjdbZfN_zjG)</br>
+[Download Link to the news file](https://drive.google.com/open?id=1HPtrsoL9cX70rZ31lWgmPCjdbZfN_zjG)</br>
 
 
 

From 1c00df716d58a98b744f1790bb15bb72f159b47f Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:20:25 +0530
Subject: [PATCH 05/12] Update pattern.py

---
 .../09-malayalam/malayalam news classification/pattern.py     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/09-malayalam/malayalam news classification/pattern.py b/examples/09-malayalam/malayalam news classification/pattern.py
index 2e7d8b55..8f629c0d 100644
--- a/examples/09-malayalam/malayalam news classification/pattern.py	
+++ b/examples/09-malayalam/malayalam news classification/pattern.py	
@@ -9,7 +9,7 @@
 
 from pattern.web import Bing, SEARCH, plaintext,Google
 from ulmfit import ULMFiT
-engine = Google(license='AIzaSyCND8YQhyxQZU1E4y4gCzg8V61NQ61BYtw')
+engine = Google(license=key)
 searched=[]
 
 for result in engine.search('സഞ്ജു സാംസൺ', type=SEARCH, start=1):
@@ -22,4 +22,4 @@
     x=model.predict(i)
     
     print(x['intent'])
-    
\ No newline at end of file
+    

From 5aaab49c0570ef8341e28d5ee0524a9cd2298182 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:21:18 +0530
Subject: [PATCH 06/12] Delete pattern.py

---
 .../malayalam news classification/pattern.py  | 25 -------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 examples/09-malayalam/malayalam news classification/pattern.py

diff --git a/examples/09-malayalam/malayalam news classification/pattern.py b/examples/09-malayalam/malayalam news classification/pattern.py
deleted file mode 100644
index 8f629c0d..00000000
--- a/examples/09-malayalam/malayalam news classification/pattern.py	
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Apr  2 02:17:22 2019
-
-@author: abhijithneilabraham
-"""
-
-
-from pattern.web import Bing, SEARCH, plaintext,Google
-from ulmfit import ULMFiT
-engine = Google(license=key)
-searched=[]
-
-for result in engine.search('സഞ്ജു സാംസൺ', type=SEARCH, start=1):
-    print(repr(plaintext(result.text)))
-    searched.append(repr(plaintext(result.text)))
-print(len(searched))    
-
-model = ULMFiT("news/")
-for i in searched:
-    x=model.predict(i)
-    
-    print(x['intent'])
-    

From a1512c526ee06e2e17376b7a2f55ccc57dc8ded4 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:21:52 +0530
Subject: [PATCH 07/12] Create pattern_news.py

---
 .../pattern_news.py                           | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 examples/09-malayalam/malayalam news classification/pattern_news.py

diff --git a/examples/09-malayalam/malayalam news classification/pattern_news.py b/examples/09-malayalam/malayalam news classification/pattern_news.py
new file mode 100644
index 00000000..8f629c0d
--- /dev/null
+++ b/examples/09-malayalam/malayalam news classification/pattern_news.py	
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Apr  2 02:17:22 2019
+
+@author: abhijithneilabraham
+"""
+
+
+from pattern.web import Bing, SEARCH, plaintext,Google
+from ulmfit import ULMFiT
+engine = Google(license=key)
+searched=[]
+
+for result in engine.search('സഞ്ജു സാംസൺ', type=SEARCH, start=1):
+    print(repr(plaintext(result.text)))
+    searched.append(repr(plaintext(result.text)))
+print(len(searched))    
+
+model = ULMFiT("news/")
+for i in searched:
+    x=model.predict(i)
+    
+    print(x['intent'])
+    

From 4fec119dd79ebad8d652a0a98493ba4d4b9ad5a2 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:22:05 +0530
Subject: [PATCH 08/12] Update Readme.md

---
 examples/09-malayalam/malayalam news classification/Readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/09-malayalam/malayalam news classification/Readme.md b/examples/09-malayalam/malayalam news classification/Readme.md
index 0107b4bb..61ce3d75 100644
--- a/examples/09-malayalam/malayalam news classification/Readme.md	
+++ b/examples/09-malayalam/malayalam news classification/Readme.md	
@@ -3,7 +3,7 @@ Malayalam News Classification
 
 This example shows you how to do a search query for online news in malayalam language and the search results are classified into :
 Buisiness, entertainment,sports,Kerala,India.
-<br>Here Google is the used search engine.Open the code sample <b>pattern.py</b> and enter a search keyword in malayalam.</br>
+<br>Here Google is the used search engine.Open the code sample <b>pattern_news.py</b> and enter a search keyword in malayalam.</br>
 
 <br>Before running the program, download the folder <b>News</b> from the following link and use it in the same folder as the sample code</br>
 <br>

From 2f4913562f004d89414c99dc14f88ba6e31ebe5f Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Tue, 2 Apr 2019 18:25:50 +0530
Subject: [PATCH 09/12] Update pattern_news.py

---
 .../09-malayalam/malayalam news classification/pattern_news.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/09-malayalam/malayalam news classification/pattern_news.py b/examples/09-malayalam/malayalam news classification/pattern_news.py
index 8f629c0d..516b3877 100644
--- a/examples/09-malayalam/malayalam news classification/pattern_news.py	
+++ b/examples/09-malayalam/malayalam news classification/pattern_news.py	
@@ -11,8 +11,9 @@
 from ulmfit import ULMFiT
 engine = Google(license=key)
 searched=[]
+search_key='സഞ്ജു സാംസൺ'
 
-for result in engine.search('സഞ്ജു സാംസൺ', type=SEARCH, start=1):
+for result in engine.search(search_key, type=SEARCH, start=1):
     print(repr(plaintext(result.text)))
     searched.append(repr(plaintext(result.text)))
 print(len(searched))    

From cfd00716cf631d3f7955b0b2831892c3ed3bc91b Mon Sep 17 00:00:00 2001
From: Abhijith Neil Abraham <abhijithneilabrahampk@gmail.com>
Date: Wed, 3 Apr 2019 02:38:32 +0530
Subject: [PATCH 10/12] Update pattern/web/__init__.py

---
 pattern/web/__init__.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/pattern/web/__init__.py b/pattern/web/__init__.py
index c76cea88..78484a45 100644
--- a/pattern/web/__init__.py
+++ b/pattern/web/__init__.py
@@ -16,6 +16,7 @@
 from builtins import str, bytes, dict, int, chr
 from builtins import map, filter, zip
 from builtins import object, range, next
+from translate import Translator
 
 from .utils import get_url_query, get_form_action, stringify_values, json_iter_parse
 
@@ -2146,6 +2147,22 @@ def f(v):
             else:
                 self._pagination[k] = id
         return results
+    
+    
+    
+    def translated(self,lang,query):
+        trans_results=self.search(query, start=1, count=10)
+        translator= Translator(to_lang=lang)
+        translation = translator.translate(trans_results)
+        return translation
+        
+    '''
+    This translated takes the results from a search query and translates it to the language
+    specified by the lang keyword.
+    usage===>Twitter.translated("German","cat")
+    
+    '''    
+        
 
     def profile(self, query, start=1, count=10, **kwargs):
         """ Returns a list of results for the given author id, alias or search query.

From ee557ecb992f67acaa76ef0d2d228b95983624e3 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Wed, 3 Apr 2019 15:39:41 +0530
Subject: [PATCH 11/12] Update .travis.yml

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index c6a4d3f7..97a3514d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,6 +19,7 @@ install:
   - pip freeze
   # Install and compile libsvm and liblinear
   - sudo apt-get install -y build-essential
+  - pip install translate
   - git clone https://github.com/cjlin1/libsvm
   - cd libsvm; make lib; sudo cp libsvm.so.2 /lib; sudo ln -s /lib/libsvm.so.2 /lib/libsvm.so; cd ..
   - git clone https://github.com/cjlin1/liblinear

From 881cfb08852b98e430a3c4716a3e2dc1cebc0860 Mon Sep 17 00:00:00 2001
From: abhijithneilabraham <abhijithneilabrahampk@gmail.com>
Date: Sun, 7 Apr 2019 10:51:28 +0530
Subject: [PATCH 12/12] Update .travis.yml

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 97a3514d..a4cf4957 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,7 @@ language: python
 dist: precise
 
 python:
-  - "3.6"
+  - "3.6.5"
 
 before_install:
   - export TZ=Europe/Brussels