diff --git a/pattern/vector/svm/liblinear.py b/pattern/vector/svm/liblinear.py index e338a5a3..f17b3215 100644 --- a/pattern/vector/svm/liblinear.py +++ b/pattern/vector/svm/liblinear.py @@ -31,7 +31,7 @@ if sys.platform == 'win32': liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll')) else: - liblinear = CDLL(path.join(dirname, '../liblinear.so.3')) + liblinear = CDLL(path.join(dirname, 'macos/liblinear-2.20/liblinear.so.3')) except: # For unix the prefix 'lib' is not considered. if find_library('linear'): @@ -39,7 +39,7 @@ elif find_library('liblinear'): liblinear = CDLL(find_library('liblinear')) else: - raise Exception('LIBLINEAR library not found.') + libsvm = CDLL(path.join(path.dirname(__file__), 'ubuntu/liblinear-2.20/liblinear.so.3')) L2R_LR = 0 L2R_L2LOSS_SVC_DUAL = 1 diff --git a/pattern/vector/svm/libsvm.py b/pattern/vector/svm/libsvm.py index 932919e3..a018550b 100644 --- a/pattern/vector/svm/libsvm.py +++ b/pattern/vector/svm/libsvm.py @@ -24,7 +24,8 @@ if sys.platform == 'win32': libsvm = CDLL(path.join(dirname, r'..\windows\libsvm.dll')) else: - libsvm = CDLL(path.join(dirname, '../libsvm.so.2')) + libsvm = CDLL(path.join(dirname, 'macos/libsvm-3.22/libsvm.so.2')) + except: # For unix the prefix 'lib' is not considered. if find_library('svm'): @@ -32,7 +33,8 @@ elif find_library('libsvm'): libsvm = CDLL(find_library('libsvm')) else: - raise Exception('LIBSVM library not found.') + libsvm = CDLL(path.join(path.dirname(__file__), 'ubuntu/libsvm-3.22/libsvm.so.2')) + C_SVC = 0 NU_SVC = 1 diff --git a/pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3 b/pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3 new file mode 100755 index 00000000..c2083ff1 Binary files /dev/null and b/pattern/vector/svm/macos/liblinear-2.20/liblinear.so.3 differ diff --git a/pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2 b/pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2 new file mode 100755 index 00000000..d65a52da Binary files /dev/null and b/pattern/vector/svm/macos/libsvm-3.22/libsvm.so.2 differ diff --git a/pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3 b/pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3 new file mode 100644 index 00000000..c703f547 Binary files /dev/null and b/pattern/vector/svm/ubuntu/liblinear-2.20/liblinear.so.3 differ diff --git a/pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2 b/pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2 new file mode 100644 index 00000000..874d89c4 Binary files /dev/null and b/pattern/vector/svm/ubuntu/libsvm-3.22/libsvm.so.2 differ diff --git a/pattern/web/__init__.py b/pattern/web/__init__.py index a2f26ba5..c2fd0319 100644 --- a/pattern/web/__init__.py +++ b/pattern/web/__init__.py @@ -1729,6 +1729,12 @@ def search(self, query, type=SEARCH, start=1, count=10, sort=RELEVANCY, size=Non - count: maximum 100. There is a limit of 150+ queries per 15 minutes. """ + + def f(v): + v = v.get('extended_tweet', {}).get('full_text', v.get('full_text', v.get('text', ''))) + return v + + if type != SEARCH: raise SearchEngineTypeError if not query or count < 1 or (isinstance(start, (int, float)) and start < 1): @@ -1748,6 +1754,7 @@ def search(self, query, type=SEARCH, start=1, count=10, sort=RELEVANCY, size=Non url.query = { "q": query, "max_id": id, + 'tweet_mode': 'extended', "count": min(count, 100) } # 2) Restrict location with geo=(latitude, longitude, radius). @@ -1778,7 +1785,7 @@ def search(self, query, type=SEARCH, start=1, count=10, sort=RELEVANCY, size=Non r = Result(url=None) r.id = self.format(x.get("id_str")) r.url = self.format(TWITTER_STATUS % (x.get("user", {}).get("screen_name"), x.get("id_str"))) - r.text = self.format(x.get("text")) + r.text = self.format(f(x)) r.date = self.format(x.get("created_at")) r.author = self.format(x.get("user", {}).get("screen_name")) r.language = self.format(x.get("metadata", {}).get("iso_language_code")) @@ -1789,7 +1796,7 @@ def search(self, query, type=SEARCH, start=1, count=10, sort=RELEVANCY, size=Non if rt: comment = re.search(r"^(.*? )RT", r.text) comment = comment.group(1) if comment else "" - r.text = self.format("RT @%s: %s" % (rt["user"]["screen_name"], rt["text"])) + r.text = self.format("RT @%s: %s" % (rt["user"]["screen_name"], f(rt))) results.append(r) # Twitter.search(start=id, count=10) takes a tweet.id, # and returns 10 results that are older than this id. @@ -1879,15 +1886,20 @@ def __init__(self, socket, delimiter="\n", format=lambda s: s, **kwargs): self.format = format def parse(self, data): + """ TwitterStream.queue will populate with Result objects as TwitterStream.update() is called iteratively. """ + def f(v): + v = v.get('extended_tweet', {}).get('full_text', v.get('full_text', v.get('text', ''))) + return v + if data.strip(): x = json.loads(data) r = Result(url=None) r.id = self.format(x.get("id_str")) r.url = self.format(TWITTER_STATUS % (x.get("user", {}).get("screen_name"), x.get("id_str"))) - r.text = self.format(x.get("text")) + r.text = self.format(f(x)) r.date = self.format(x.get("created_at")) r.author = self.format(x.get("user", {}).get("screen_name")) r.language = self.format(x.get("metadata", {}).get("iso_language_code")) @@ -1898,7 +1910,7 @@ def parse(self, data): if rt: comment = re.search(r"^(.*? )RT", r.text) comment = comment.group(1) if comment else "" - r.text = self.format("RT @%s: %s" % (rt["user"]["screen_name"], rt["text"])) + r.text = self.format("RT @%s: %s" % (rt["user"]["screen_name"], f(rt))) return r