From 02d3117ebfe739b44e084c9d217f9eb0d2c6220f Mon Sep 17 00:00:00 2001 From: Sam Walton Date: Wed, 14 Aug 2019 15:27:05 +0200 Subject: [PATCH 1/3] Don't make edits already made (avoid edit wars) --- db.py | 27 +++++++++++++++++++++++++++ retractionbot.py | 36 ++++++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/db.py b/db.py index c5272ed..82f0d5c 100644 --- a/db.py +++ b/db.py @@ -87,3 +87,30 @@ def log_retraction_edit(timestamp, domain, page_title, old_id, new_id): old_id=old_id, new_id=new_id )) + + +def already_retracted(old_id, new_id, domain, page_title): + """ + Given old and new IDs, a domain, and a page title, check if this edit has + already been made in the past so we can avoid edit warring. + """ + cur = db.cursor() + query = """ + SELECT COUNT(*) FROM edit_log + WHERE original_id = {old_id} + AND retraction_id = new_id + AND domain = '{domain}' + AND page_title = '{page_title}' + """ + cur.execute(query.format( + old_id=old_id, + new_id=new_id, + domain=domain, + page_title=page_title + )) + count_result = cur.fetchone() + + if count_result[0] != 0: + return True + else: + return False diff --git a/retractionbot.py b/retractionbot.py index 03f979d..5a1db8a 100644 --- a/retractionbot.py +++ b/retractionbot.py @@ -7,7 +7,9 @@ import re import yaml -from db import load_retracted_identifiers, log_retraction_edit +from db import (load_retracted_identifiers, + log_retraction_edit, + already_retracted) directory = os.path.dirname(os.path.realpath(__file__)) @@ -119,19 +121,25 @@ def run_bot(): # Only bother trying to make an edit if we changed anything if page_text != wp_page.text: - wp_page.text = page_text - edit_summary = "Flagging a cited source as retracted" - - #wp_page.save(edit_summary, minor=False) - logger.info("Successfully edited {page_name} with " - "retracted source(s).".format( - page_name=wp_page.title() - )) - log_retraction_edit(datetime.datetime.now(), - language + ".wikipedia.org", - wp_page, - original_id, - retraction_id) + domain = language + ".wikipedia.org" + # Make sure we're not edit warring + if not already_retracted(original_id, + retraction_id, + domain, + wp_page): + wp_page.text = page_text + edit_summary = "Flagging a cited source as retracted" + + #wp_page.save(edit_summary, minor=False) + logger.info("Successfully edited {page_name} with " + "retracted source(s).".format( + page_name=wp_page.title() + )) + log_retraction_edit(datetime.datetime.now(), + domain, + wp_page, + original_id, + retraction_id) if __name__ == '__main__': From b64946432c7cfb0b82258d926aa7f3019b3ad874 Mon Sep 17 00:00:00 2001 From: Sam Walton Date: Wed, 14 Aug 2019 15:34:08 +0200 Subject: [PATCH 2/3] Fix db query --- db.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db.py b/db.py index 82f0d5c..1e4c2f7 100644 --- a/db.py +++ b/db.py @@ -98,9 +98,9 @@ def already_retracted(old_id, new_id, domain, page_title): query = """ SELECT COUNT(*) FROM edit_log WHERE original_id = {old_id} - AND retraction_id = new_id - AND domain = '{domain}' - AND page_title = '{page_title}' + AND retraction_id = {new_id} + AND domain = "{domain}" + AND page_title = "{page_title}" """ cur.execute(query.format( old_id=old_id, From 37825465d3c9db65ad6ac61eb1f01e6984340da4 Mon Sep 17 00:00:00 2001 From: Sam Walton Date: Wed, 14 Aug 2019 15:36:27 +0200 Subject: [PATCH 3/3] Further fix db query --- db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db.py b/db.py index 1e4c2f7..5c794df 100644 --- a/db.py +++ b/db.py @@ -97,8 +97,8 @@ def already_retracted(old_id, new_id, domain, page_title): cur = db.cursor() query = """ SELECT COUNT(*) FROM edit_log - WHERE original_id = {old_id} - AND retraction_id = {new_id} + WHERE original_id = "{old_id}" + AND retraction_id = "{new_id}" AND domain = "{domain}" AND page_title = "{page_title}" """