Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions wp1/logic/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import requests

import wp1.logic.util as logic_util
from wp1.constants import GLOBAL_TIMESTAMP, TS_FORMAT
from wp1.constants import GLOBAL_TIMESTAMP, TS_FORMAT, TS_FORMAT_WP10
from wp1.logic import log as logic_log
from wp1.logic import move as logic_move
from wp1.logic.api import page as api_page
Expand Down Expand Up @@ -87,25 +87,29 @@ def update_page_moved(
def _get_redirects_from_db(wikidb, namespace, title, timestamp_dt):
wiki_db_title = title.decode("utf-8").replace(" ", "_")
wikidb.ping()
args_dict = {"title": wiki_db_title, "namespace": namespace}
args_dict = {
"title": wiki_db_title,
"namespace": namespace,
"timestamp": timestamp_dt.strftime(TS_FORMAT_WP10),
}
with wikidb.cursor() as cursor:
cursor.execute(
"""
SELECT rd_namespace, rd_title, page_touched FROM page
JOIN redirect ON page_id = rd_from AND
page_title = %(title)s AND page_namespace = %(namespace)s
WHERE page_touched > %(timestamp)s
""",
args_dict,
)
row = cursor.fetchone()
if row:
timestamp_dt = datetime.strptime(
row["page_touched"].decode("utf-8"), "%Y%m%d%H%M%S"
)
return {
"dest_ns": row["rd_namespace"],
"dest_title": row["rd_title"],
"timestamp_dt": timestamp_dt,
"timestamp_dt": datetime.strptime(
row["page_touched"].decode("utf-8"), TS_FORMAT_WP10
),
}
return None

Expand Down
49 changes: 49 additions & 0 deletions wp1/logic/page_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,55 @@ def test_get_single_move_too_old_from_api(self, patched_site):
)
self.assertIsNone(move_data)

def test_get_redirect_from_db_stale(self):
# Insert a redirect row with a 2016 timestamp which is older than last run
with self.wikidb.cursor() as cursor:
cursor.execute(
"""
INSERT INTO page (page_id, page_namespace, page_title, page_touched)
VALUES (200, 0, 'Some_Moved_Article', '20160315142300')
"""
)
cursor.execute(
"""
INSERT INTO redirect (rd_from, rd_namespace, rd_title)
VALUES (200, 0, 'Destination_Article')
"""
)
self.wikidb.commit()

# Last run was 2022, redirect is from 2016 so should be discarded
move_data = logic_page._get_redirects_from_db(
self.wikidb, 0, b"Some Moved Article", datetime(2022, 1, 1)
)
self.assertIsNone(move_data)

def test_get_redirect_from_db_fresh(self):
# Insert a redirect row with a 2023 timestamp which newer than last run
with self.wikidb.cursor() as cursor:
cursor.execute(
"""
INSERT INTO page (page_id, page_namespace, page_title, page_touched)
VALUES (201, 0, 'Some_Moved_Article', '20230315142300')
"""
)
cursor.execute(
"""
INSERT INTO redirect (rd_from, rd_namespace, rd_title)
VALUES (201, 0, 'Destination_Article')
"""
)
self.wikidb.commit()

# Last run was 2022, redirect is from 2023 so should return
move_data = logic_page._get_redirects_from_db(
self.wikidb, 0, b"Some Moved Article", datetime(2022, 1, 1)
)
self.assertIsNotNone(move_data)
self.assertEqual(0, move_data["dest_ns"])
self.assertEqual(b"Destination_Article", move_data["dest_title"])
self.assertEqual(datetime(2023, 3, 15, 14, 23, 0), move_data["timestamp_dt"])


class LogicPageMoveDbTest(BaseWpOneDbTest):

Expand Down
Loading