From 4df5aadfba39e8945a6c0143dba33c5001d8aef0 Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 19:17:13 -0400
Subject: [PATCH 01/32] Update the proxy keys in _get_webdriver routines

to reflect the changes that happened when we moved to httpx.
Fixes an issue reported in #498.
---
 scholarly/_proxy_generator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
index 49d5bd5..d296d90 100644
--- a/scholarly/_proxy_generator.py
+++ b/scholarly/_proxy_generator.py
@@ -365,8 +365,8 @@ def _get_webdriver(self):
     def _get_chrome_webdriver(self):
         if self._proxy_works:
             webdriver.DesiredCapabilities.CHROME['proxy'] = {
-                "httpProxy": self._proxies['http'],
-                "sslProxy": self._proxies['https'],
+                "httpProxy": self._proxies['http://'],
+                "sslProxy": self._proxies['https://'],
                 "proxyType": "MANUAL"
             }
 
@@ -381,8 +381,8 @@ def _get_firefox_webdriver(self):
         if self._proxy_works:
             # Redirect webdriver through proxy
             webdriver.DesiredCapabilities.FIREFOX['proxy'] = {
-                "httpProxy": self._proxies['http'],
-                "sslProxy": self._proxies['https'],
+                "httpProxy": self._proxies['http://'],
+                "sslProxy": self._proxies['https://'],
                 "proxyType": "MANUAL",
             }
 

From cd260d666dfa2589107709f2c2f06d98caa2957b Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 19:29:15 -0400
Subject: [PATCH 02/32] Stop prepending proxy with http if it is socks

Raised in #498.
---
 scholarly/_proxy_generator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
index d296d90..fb6ac4f 100644
--- a/scholarly/_proxy_generator.py
+++ b/scholarly/_proxy_generator.py
@@ -189,11 +189,11 @@ def _use_proxy(self, http: str, https: str = None) -> bool:
         :returns: whether or not the proxy was set up successfully
         :rtype: {bool}
         """
-        if http[:4] != "http":
+        if http[:4] not in ("http", "sock"):
             http = "http://" + http
         if https is None:
             https = http
-        elif https[:5] != "https":
+        elif https[:5] not in ("https", "socks"):
             https = "https://" + https
 
         proxies = {'http://': http, 'https://': https}

From d6d03f2e1e7431790fda309ab7038153766ee6a2 Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 22:55:15 -0400
Subject: [PATCH 03/32] Add a pub_date field to the bib dictionary

---
 scholarly/publication_parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index fa58cbf..46b52ee 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -313,6 +313,7 @@ def fill(self, publication: Publication)->Publication:
                                 'YYYY/M/D',
                                 'YYYY/MM/D']
                     publication['bib']['pub_year'] = arrow.get(val.text, patterns).year
+                    publication['bib']['pub_date'] = val.text
                 elif key == 'description':
                     # try to find all the gsh_csp if they exist
                     abstract = val.find_all(class_='gsh_csp')

From 9813d3934d243132af396debfad4247e20b9946a Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 23:50:41 -0400
Subject: [PATCH 04/32] Update tags to get public access of publications

---
 scholarly/author_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scholarly/author_parser.py b/scholarly/author_parser.py
index d46038e..9a9df53 100644
--- a/scholarly/author_parser.py
+++ b/scholarly/author_parser.py
@@ -152,14 +152,14 @@ def _fill_public_access(self, soup, author):
         while True:
             rows = soup.find_all('div', 'gsc_mnd_sec_na')
             if rows:
-                for row in rows[0].find_all('a', 'gsc_mnd_art_rvw gs_nph gsc_mnd_link_font'):
+                for row in rows[0].find_all('a', 'gsc_mnd_art_rvw gsc_mnd_link_font'):
                     author_pub_id = re.findall(r"citation_for_view=([\w:-]*)",
                                                row['data-href'])[0]
                     publications[author_pub_id]["public_access"] = False
 
             rows = soup.find_all('div', 'gsc_mnd_sec_avl')
             if rows:
-                for row in rows[0].find_all('a', 'gsc_mnd_art_rvw gs_nph gsc_mnd_link_font'):
+                for row in rows[0].find_all('a', 'gsc_mnd_art_rvw gsc_mnd_link_font'):
                     author_pub_id = re.findall(r"citation_for_view=([\w:-]*)",
                                                row['data-href'])[0]
                     publications[author_pub_id]["public_access"] = True

From 63cd33abe1d0459e91cfbf011b8f3346132cffe4 Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 23:51:17 -0400
Subject: [PATCH 05/32] Account for one version of mandate be cached in tests

---
 test_module.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test_module.py b/test_module.py
index 0effc0f..3259ddc 100644
--- a/test_module.py
+++ b/test_module.py
@@ -212,15 +212,16 @@ def test_search_author_single_author(self):
                          sum(pub.get('public_access', None) is True for pub in author['publications']))
         self.assertEqual(author['public_access']['not_available'],
                          sum(pub.get('public_access', None) is False for pub in author['publications']))
-        pub = author['publications'][2]
+        pub = author['publications'][1]
         self.assertEqual(pub['author_pub_id'], u'4bahYMkAAAAJ:LI9QrySNdTsC')
         self.assertTrue('5738786554683183717' in pub['cites_id'])
         scholarly.fill(pub)
+        self.assertEqual(pub['pub_url'], "https://dl.acm.org/doi/abs/10.1145/3130800.3130815")
         mandate = Mandate(agency="US National Science Foundation", effective_date="2016/1", embargo="12 months",
                           url_policy="https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf",
                           url_policy_cached="/mandates/nsf-2021-02-13.pdf",
                           grant="BCS-1354029")
-        self.assertIn(mandate, pub['mandates'])
+        self.assertIn(mandate['agency'], [_mandate['agency'] for _mandate in pub['mandates']])
         # Trigger the pprint method, but suppress the output
         with self.suppress_stdout():
             scholarly.pprint(author)

From ecbc2133d9e38e2e468525f243b31726506deb9a Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 23:51:37 -0400
Subject: [PATCH 06/32] Decrease the coauthor count to make the test pass

The decrease is real, verified by manual counting.
---
 test_module.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_module.py b/test_module.py
index 3259ddc..2bfbe31 100644
--- a/test_module.py
+++ b/test_module.py
@@ -229,7 +229,7 @@ def test_search_author_single_author(self):
         # Check for the complete list of coauthors
         self.assertGreaterEqual(len(author['coauthors']), 20)
         if len(author['coauthors']) > 20:
-            self.assertGreaterEqual(len(author['coauthors']), 36)
+            self.assertGreaterEqual(len(author['coauthors']), 35)
             self.assertTrue('I23YUh8AAAAJ' in [_coauth['scholar_id'] for _coauth in author['coauthors']])
 
     def test_search_author_multiple_authors(self):

From 2d09680ab66b611796b6246c02b830c62a442e40 Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sun, 18 Jun 2023 01:06:52 -0400
Subject: [PATCH 07/32] Add url entry to bibtex

Addresses #499.
---
 scholarly/publication_parser.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index 46b52ee..5d7bf37 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -402,6 +402,11 @@ def bibtex(self, publication: Publication) -> str:
             publication = self.fill(publication)
         a = BibDatabase()
         converted_dict = publication['bib']
+        try:
+            url = publication['eprint_url']
+        except KeyError:
+            url = publication.get('pub_url', '')
+        converted_dict['url'] = url
         converted_dict = remap_bib(converted_dict, _BIB_REVERSE_MAPPING)
         str_dict = {key: str(value) for key, value in converted_dict.items()}
         # convert every key of the dictionary to string to be Bibtex compatible

From fe98eb162d6eba0110974e989f115a51f493c5f4 Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sun, 18 Jun 2023 01:08:42 -0400
Subject: [PATCH 08/32] Fix bibtex unittest

---
 test_module.py                  | 25 +++++--------------------
 testdata/test_bibtex_result.txt | 11 +++++++++++
 2 files changed, 16 insertions(+), 20 deletions(-)
 create mode 100644 testdata/test_bibtex_result.txt

diff --git a/test_module.py b/test_module.py
index 2bfbe31..329eef6 100644
--- a/test_module.py
+++ b/test_module.py
@@ -685,33 +685,18 @@ def test_search_pubs_citedby_id(self):
         pubs = [p for p in scholarly.search_citedby(publication_id)]
         self.assertGreaterEqual(len(pubs), 11)
 
-    @unittest.skip(reason="The BiBTeX comparison is not reliable")
     def test_bibtex(self):
         """
         Test that we get the BiBTeX entry correctly
         """
 
-        expected_result = \
-        ("""@inproceedings{ester1996density,
-         abstract = {Clustering algorithms are attractive for the task of class identification in spatial databases. """
-         """However, the application to large spatial databases rises the following requirements for clustering algorithms: """
-         """minimal requirements of domain knowledge to determine the input},
-         author = {Ester, Martin and Kriegel, Hans-Peter and Sander, J{\\"o}rg and Xu, Xiaowei and others},
-         booktitle = {kdd},
-         number = {34},
-         pages = {226--231},
-         pub_year = {1996},
-         title = {A density-based algorithm for discovering clusters in large spatial databases with noise.},
-         venue = {kdd},
-         volume = {96}
-        }
+        with open("testdata/bibtex.txt", "r") as f:
+            expected_result = "".join(f.readlines())
 
-        """
-        )
-        pub = scholarly.search_single_pub("A density-based algorithm for discovering clusters in large "
-                                          "spatial databases with noise", filled=True)
+        pub = scholarly.search_single_pub("A distribution-based clustering algorithm for mining in large "
+                                          "spatial databases", filled=True)
         result = scholarly.bibtex(pub)
-        self.assertEqual(result, expected_result.replace("\n        ", "\n"))
+        self.assertEqual(result, expected_result)
 
     def test_search_pubs(self):
         """
diff --git a/testdata/test_bibtex_result.txt b/testdata/test_bibtex_result.txt
new file mode 100644
index 0000000..be925ef
--- /dev/null
+++ b/testdata/test_bibtex_result.txt
@@ -0,0 +1,11 @@
+@inproceedings{xu1998distribution,
+ abstract = {The problem of detecting clusters of points belonging to a spatial point process arises in many applications. In this paper, we introduce the new clustering algorithm DBCLASD (Distribution-Based Clustering of LArge Spatial Databases) to discover clusters of this type. The results of experiments demonstrate that DBCLASD, contrary to partitioning algorithms such as CLARANS (Clustering Large Applications based on RANdomized Search), discovers clusters of arbitrary shape. Furthermore, DBCLASD does not require any input},
+ author = {Xu, Xiaowei and Ester, Martin and Kriegel, H-P and Sander, J{\"o}rg},
+ booktitle = {Proceedings 14th International Conference on Data Engineering},
+ organization = {IEEE},
+ pages = {324--331},
+ pub_year = {1998},
+ title = {A distribution-based clustering algorithm for mining in large spatial databases},
+ url = {https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=933cba585a12e56a8f60511ebeb74b8cb42634b1},
+ venue = {… Conference on Data …}
+}

From 6caabb291214807935cce938f0ef4463a01d3c6c Mon Sep 17 00:00:00 2001
From: arunkannawadi <kj.arun.kj@gmail.com>
Date: Sat, 17 Jun 2023 18:35:54 -0400
Subject: [PATCH 09/32] Update CITATION version to 1.7.11

Fixes #501
---
 CITATION.cff | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index 01a09dc..3980c50 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -52,4 +52,4 @@ keywords:
     citation-index scholarly-articles
     citation-analysis scholar googlescholar
 license: Unlicense
-version: 1.5.0
+version: 1.7.11

From 91bada0e02c69710c25429811a7698d44eca72af Mon Sep 17 00:00:00 2001
From: Ji Ma <11808231+ma-ji@users.noreply.github.com>
Date: Tue, 11 Jul 2023 15:09:09 -0500
Subject: [PATCH 10/32] proxy format conflict

resolve conflict between proxy format: HTTPX and Requests
---
 scholarly/_proxy_generator.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
index fb6ac4f..01672d1 100644
--- a/scholarly/_proxy_generator.py
+++ b/scholarly/_proxy_generator.py
@@ -136,7 +136,8 @@ def _check_proxy(self, proxies) -> bool:
         :rtype: {bool}
         """
         with requests.Session() as session:
-            session.proxies = proxies
+            # Reformat proxy for requests. Requests and HTTPX use different proxy format.
+            session.proxies = {'http':proxies['http://'], 'https':proxies['https://']}
             try:
                 resp = session.get("http://httpbin.org/ip", timeout=self._TIMEOUT)
                 if resp.status_code == 200:
@@ -189,6 +190,7 @@ def _use_proxy(self, http: str, https: str = None) -> bool:
         :returns: whether or not the proxy was set up successfully
         :rtype: {bool}
         """
+        # Reformat proxy for HTTPX
         if http[:4] not in ("http", "sock"):
             http = "http://" + http
         if https is None:
@@ -521,6 +523,7 @@ def _fp_coroutine(self, timeout=1, wait_time=120):
             proxies = {'http://': proxy, 'https://': proxy}
             proxy_works = self._check_proxy(proxies)
             if proxy_works:
+                print(proxies)
                 dirty_proxy = (yield proxy)
                 t1 = time.time()
             else:

From 03f063e8a9a97bb16739cfb23cb9ba5b25301d6b Mon Sep 17 00:00:00 2001
From: Ji Ma <11808231+ma-ji@users.noreply.github.com>
Date: Tue, 11 Jul 2023 15:10:41 -0500
Subject: [PATCH 11/32] del debug print

---
 scholarly/_proxy_generator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
index 01672d1..9967ed2 100644
--- a/scholarly/_proxy_generator.py
+++ b/scholarly/_proxy_generator.py
@@ -523,7 +523,6 @@ def _fp_coroutine(self, timeout=1, wait_time=120):
             proxies = {'http://': proxy, 'https://': proxy}
             proxy_works = self._check_proxy(proxies)
             if proxy_works:
-                print(proxies)
                 dirty_proxy = (yield proxy)
                 t1 = time.time()
             else:

From c7d4737d03cdb40bfa82d6a6f98a2a65efb6914a Mon Sep 17 00:00:00 2001
From: Melroy van den Berg <melroy@melroy.org>
Date: Thu, 10 Aug 2023 23:49:50 +0200
Subject: [PATCH 12/32] Update requirements.txt

Package name changed from `fake_useragent` to `fake-useragent`. Disclaimer: I'm the maintainer.
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 5b14200..a9b1021 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ arrow
 beautifulsoup4
 bibtexparser
 deprecated
-fake_useragent
+fake-useragent
 free-proxy
 httpx
 python-dotenv

From 6ddccd71a9043050351f8cbe5513a82f36fb10e2 Mon Sep 17 00:00:00 2001
From: "David V. Lu" <davidvlu@gmail.com>
Date: Tue, 24 Oct 2023 15:12:46 -0400
Subject: [PATCH 13/32] Update citations by year data

---
 test_module.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_module.py b/test_module.py
index 329eef6..3859df3 100644
--- a/test_module.py
+++ b/test_module.py
@@ -571,7 +571,7 @@ def test_cites_per_year(self):
         """
         author = scholarly.search_author_id('DW_bVcEAAAAJ')
         scholarly.fill(author, sections=['counts'])
-        cpy = {2014: 1, 2015: 2, 2016: 2, 2017: 0, 2018: 2, 2019: 1, 2020: 12, 2021: 21, 2022: 35}
+        cpy = {2014: 1, 2015: 2, 2016: 2, 2017: 0, 2018: 2, 2019: 0, 2020: 11, 2021: 21, 2022: 37, 2023: 27}
         for year, count in cpy.items():
             self.assertEqual(author['cites_per_year'][year], count)
 

From 9f194525fbfd4d3460137e4d522c8fe8e78cd982 Mon Sep 17 00:00:00 2001
From: "David V. Lu" <davidvlu@gmail.com>
Date: Tue, 24 Oct 2023 15:19:32 -0400
Subject: [PATCH 14/32] Results from running codespell

---
 CHANGELOG.md                  |  6 +++---
 README.md                     |  2 +-
 scholarly/_proxy_generator.py | 10 +++++-----
 scholarly/_scholarly.py       |  2 +-
 scholarly/author_parser.py    |  4 ++--
 scholarly/data_types.py       |  4 ++--
 test_module.py                |  4 ++--
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b5932ad..1bfbb0c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,7 +8,7 @@
 ### Bugfixes
 - Fix pprint failures on Windows #413.
 - Thoroughly handle 1000 or more publications that are available (or not) according to public access mandates #414.
-- Fix errors in `download_mandates_csv` that may occassionally occur for agencies without a policy link #413.
+- Fix errors in `download_mandates_csv` that may occasionally occur for agencies without a policy link #413.
 
 ## Changes in v1.6.3
 
@@ -35,7 +35,7 @@
 
 ### Features
 - Download table of funding agencies as a CSV file with URL to the funding mandates included
-- Downlad top-ranking journals in general, under sub-categories and in different languages as a CSV file
+- Download top-ranking journals in general, under sub-categories and in different languages as a CSV file
 
 ### Bugfixes
 - #392
@@ -58,7 +58,7 @@
 ## Changes in v1.5.0
 ### Features
 - Fetch the public access mandates information from a Scholar profile and mark the publications whether or not they satisfy the open-access mandate.
-- Fetch an author's organization identifer from their Scholar profile
+- Fetch an author's organization identifier from their Scholar profile
 - Search for all authors affiliated with an organization
 - Fetch homepage URL from a Scholar profile
 ### Enhancements
diff --git a/README.md b/README.md
index 88ddfb2..d1ac442 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ This means your code that uses an earlier version of `scholarly` is guaranteed t
 
 ## Tests
 
-To check if your installation is succesful, run the tests by executing the `test_module.py` file as:
+To check if your installation is successful, run the tests by executing the `test_module.py` file as:
 
 ```bash
 python3 test_module
diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
index 9967ed2..2d2ec6f 100644
--- a/scholarly/_proxy_generator.py
+++ b/scholarly/_proxy_generator.py
@@ -109,7 +109,7 @@ def SingleProxy(self, http=None, https=None):
 
         :param http: http proxy address
         :type http: string
-        :param https: https proxy adress
+        :param https: https proxy address
         :type https: string
         :returns: whether or not the proxy was set up successfully
         :rtype: {bool}
@@ -117,7 +117,7 @@ def SingleProxy(self, http=None, https=None):
         :Example::
 
             >>> pg = ProxyGenerator()
-            >>> success = pg.SingleProxy(http = <http proxy adress>, https = <https proxy adress>)
+            >>> success = pg.SingleProxy(http = <http proxy address>, https = <https proxy address>)
         """
         self.logger.info("Enabling proxies: http=%s https=%s", http, https)
         proxy_works = self._use_proxy(http=http, https=https)
@@ -162,7 +162,7 @@ def _check_proxy(self, proxies) -> bool:
     def _refresh_tor_id(self, tor_control_port: int, password: str) -> bool:
         """Refreshes the id by using a new Tor node.
 
-        :returns: Whether or not the refresh was succesful
+        :returns: Whether or not the refresh was successful
         :rtype: {bool}
         """
         try:
@@ -434,7 +434,7 @@ def _handle_captcha2(self, url):
                 self.logger.info("Google thinks we are DOSing the captcha.")
                 raise e
             except (WebDriverException) as e:
-                self.logger.info("Browser seems to be disfunctional - closed by user?")
+                self.logger.info("Browser seems to be dysfunctional - closed by user?")
                 raise e
             except Exception as e:
                 # TODO: This exception handler should eventually be removed when
@@ -500,7 +500,7 @@ def _close_session(self):
                 self.logger.warning("Could not close webdriver cleanly: %s", e)
 
     def _fp_coroutine(self, timeout=1, wait_time=120):
-        """A coroutine to continuosly yield free proxies
+        """A coroutine to continuously yield free proxies
 
         It takes back the proxies that stopped working and marks it as dirty.
         """
diff --git a/scholarly/_scholarly.py b/scholarly/_scholarly.py
index f0162dc..4f64f51 100644
--- a/scholarly/_scholarly.py
+++ b/scholarly/_scholarly.py
@@ -428,7 +428,7 @@ def search_pubs_custom_url(self, url: str)->_SearchScholarIterator:
         parameters in the Advanced Search dialog box and then use the URL here
         to programmatically fetch the results.
 
-        :param url: custom url to seach for the publication
+        :param url: custom url to search for the publication
         :type url: string
         """
         return self.__nav.search_publications(url)
diff --git a/scholarly/author_parser.py b/scholarly/author_parser.py
index 9a9df53..4516b80 100644
--- a/scholarly/author_parser.py
+++ b/scholarly/author_parser.py
@@ -222,7 +222,7 @@ def _get_coauthors_short(self, soup):
     def _get_coauthors_long(self, author):
         """Get the long (>20) list of coauthors.
 
-        This method fetches the complete list of coauthors bu opening a new
+        This method fetches the complete list of coauthors by opening a new
         page filled with the complete coauthor list.
 
         Note:
@@ -283,7 +283,7 @@ def fill(self, author, sections: list = [], sortby="citedby", publication_limit:
         :type sortby: string
         :param publication_limit: Select the max number of publications you want you want to fill for the author. Defaults to no limit.
         :type publication_limit: int
-        :returns: The filled object if fill was successfull, False otherwise.
+        :returns: The filled object if fill was successful, False otherwise.
         :rtype: Author or bool
 
         :Example::
diff --git a/scholarly/data_types.py b/scholarly/data_types.py
index dade6ba..13a9a38 100644
--- a/scholarly/data_types.py
+++ b/scholarly/data_types.py
@@ -49,7 +49,7 @@ class PublicationSource(str, Enum):
     We also have publications that appear in the "author pages" of Google Scholar.
     These publications are often a set of publications "merged" together.
 
-    The snippet version of these publications conains the title of the publication,
+    The snippet version of these publications contains the title of the publication,
     a subset of the authors, the (sometimes truncated) venue, and the year of the publication
     and the number of papers that cite the publication.
 
@@ -183,7 +183,7 @@ class Publication(TypedDict, total=False):
                        the "citedby_url" will be a comma-separated list of values.
                        It is also used to return the "cluster" of all the different versions of the paper.
                        https://scholar.google.com/scholar?cluster=16766804411681372720&hl=en
-    :param cites_per_year: a dictionay containing the number of citations per year for this Publication
+    :param cites_per_year: a dictionary containing the number of citations per year for this Publication
                            (source: AUTHOR_PUBLICATION_ENTRY)
     :param eprint_url: digital version of the Publication. Usually it is a pdf.
     :param pub_url: url of the website providing the publication
diff --git a/test_module.py b/test_module.py
index 329eef6..1f6b38c 100644
--- a/test_module.py
+++ b/test_module.py
@@ -244,7 +244,7 @@ def test_search_author_multiple_authors(self):
     def test_search_author_id(self):
         """
         Test the search by author ID. Marie Skłodowska-Curie's ID is
-        EmD_lTEAAAAJ and these IDs are permenant
+        EmD_lTEAAAAJ and these IDs are permanent
         """
         author = scholarly.search_author_id('EmD_lTEAAAAJ')
         self.assertEqual(author['name'], u'Marie Skłodowska-Curie')
@@ -254,7 +254,7 @@ def test_search_author_id(self):
     def test_search_author_id_filled(self):
         """
         Test the search by author ID. Marie Skłodowska-Curie's ID is
-        EmD_lTEAAAAJ and these IDs are permenant.
+        EmD_lTEAAAAJ and these IDs are permanent.
         As of July 2020, Marie Skłodowska-Curie has 1963 citations
         on Google Scholar and 179 publications
         """

From 7a4da4b090600a112654aea9be07c7d83adb81ce Mon Sep 17 00:00:00 2001
From: Arun Kannawadi <kj.arun.kj@gmail.com>
Date: Sun, 5 Nov 2023 12:48:56 -0500
Subject: [PATCH 15/32] Remove 2023 values from test_cites_per_year

---
 test_module.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_module.py b/test_module.py
index 3859df3..f08f6e3 100644
--- a/test_module.py
+++ b/test_module.py
@@ -571,7 +571,7 @@ def test_cites_per_year(self):
         """
         author = scholarly.search_author_id('DW_bVcEAAAAJ')
         scholarly.fill(author, sections=['counts'])
-        cpy = {2014: 1, 2015: 2, 2016: 2, 2017: 0, 2018: 2, 2019: 0, 2020: 11, 2021: 21, 2022: 37, 2023: 27}
+        cpy = {2014: 1, 2015: 2, 2016: 2, 2017: 0, 2018: 2, 2019: 0, 2020: 11, 2021: 21, 2022: 37}
         for year, count in cpy.items():
             self.assertEqual(author['cites_per_year'][year], count)
 

From 2af460eefdaa9d365e5e2b4fab11583fd1e6def9 Mon Sep 17 00:00:00 2001
From: Daniel Lebedinsky <daniel.a.lebedinsky@gmail.com>
Date: Sun, 19 Nov 2023 19:56:03 -0500
Subject: [PATCH 16/32] Fixed test_bibtex unit test, updated CONTRIBUTING.md

---
 .github/CONTRIBUTING.md | 13 +++++++------
 test_module.py          |  2 +-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 9f80eb1..abace07 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -16,12 +16,13 @@ Additionally, if you are interesting in contributing to the codebase, submit a p
 
 ## How to contribute
 
-1. Create a fork of `scholarly-python-package/scholarly` repository.
-2. If you add a new feature, try to include tests in already existing test cases, or create a new test case if that is not possible.
-3. Make sure the unit tests pass before raising a PR. For all the unit tests to pass, you typically need to setup a premium proxy service such as `ScraperAPI` or `Luminati` (`Bright Data`). If you do not have an account, you may try to use `FreeProxy`. Without a proxy, 6 out of 17 test cases will be skipped.
-4. Check that the documentatation is consistent with the code. Check that the documentation builds successfully.
-5. Submit a PR, with `develop` as your base branch.
-6. After an initial code review by the maintainers, the unit tests will be run with the `ScraperAPI` key stored in the Github repository. Passing all tests cases is necessary before merging your PR.
+1. Create a fork of `scholarly-python-package/scholarly` repository. Make sure that "Copy the main branch only" is **not** checked off.
+2. After cloning your fork and checking out into the develop branch, run `python setup.py --help-commands` for more info on how to install dependencies and build. You may need to run it with `sudo`.
+3. If you add a new feature, try to include tests in already existing test cases, or create a new test case if that is not possible. For a comprehensive output, run `python -m unittest -v test_module.py`
+4. Make sure the unit tests pass before raising a PR. For all the unit tests to pass, you typically need to setup a premium proxy service such as `ScraperAPI` or `Luminati` (`Bright Data`). By default, `python setup.py install` will get `FreeProxy`. Without a proxy, 6 out of 17 test cases will be skipped.
+5. Check that the documentatation is consistent with the code. Check that the documentation builds successfully.
+6. Submit a PR, with `develop` as your base branch.
+7. After an initial code review by the maintainers, the unit tests will be run with the `ScraperAPI` key stored in the Github repository. Passing all tests cases is necessary before merging your PR.
 
 
 ## Build Docs
diff --git a/test_module.py b/test_module.py
index b2a3ac5..a0b5ac7 100644
--- a/test_module.py
+++ b/test_module.py
@@ -690,7 +690,7 @@ def test_bibtex(self):
         Test that we get the BiBTeX entry correctly
         """
 
-        with open("testdata/bibtex.txt", "r") as f:
+        with open("testdata/test_bibtex_result.txt", "r") as f:
             expected_result = "".join(f.readlines())
 
         pub = scholarly.search_single_pub("A distribution-based clustering algorithm for mining in large "

From ba3b8a4fb56d72fcf7ff89208021d679cae12b51 Mon Sep 17 00:00:00 2001
From: Daniel Lebedinsky <daniel.a.lebedinsky@gmail.com>
Date: Wed, 29 Nov 2023 00:11:55 -0500
Subject: [PATCH 17/32] Added test for FreeProxy

---
 test_module.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/test_module.py b/test_module.py
index a0b5ac7..43c8b5a 100644
--- a/test_module.py
+++ b/test_module.py
@@ -78,6 +78,20 @@ def test_tor_launch_own_process(self):
         authors = [a for a in scholarly.search_author(query)]
         self.assertGreaterEqual(len(authors), 1)
 
+class TestFreeProxy(unittest.TestCase):
+    luminati = os.getenv("USERNAME") and os.getenv("PASSWORD") and os.getenv("PORT")
+    scraperAPI = os.getenv('SCRAPER_API_KEY')
+    skipIf = (luminati is not None) or (scraperAPI is not None)
+
+    @unittest.skipIf(skipIf, reason="Other proxy is being used")
+    def test_freeproxy(self):
+        """
+        Test that we can set up FreeProxy successfully
+        """
+        proxy_generator = ProxyGenerator()
+        success = proxy_generator.FreeProxies()
+        self.assertTrue(success)
+        self.assertEqual(proxy_generator.proxy_mode, "FREE_PROXIES")
 
 class TestScholarly(unittest.TestCase):
 

From 3b5a2e8a9b73028631b21cf9d5c70d8ad16b4121 Mon Sep 17 00:00:00 2001
From: keko24 <kotevski_andrej@hotmail.com>
Date: Mon, 24 Jun 2024 11:16:35 +0200
Subject: [PATCH 18/32] Fixed an issue where search_pubs doesn't find a
 publication when only a single publication exists for the query. Added a unit
 test for search_pubs that tests for the previous problem.

---
 scholarly/publication_parser.py |  2 +-
 test_module.py                  | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index 5d7bf37..60d4769 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -58,7 +58,7 @@ def _load_url(self, url: str):
         # this is temporary until setup json file
         self._soup = self._nav._get_soup(url)
         self._pos = 0
-        self._rows = self._soup.find_all('div', class_='gs_r gs_or gs_scl') + self._soup.find_all('div', class_='gsc_mpat_ttl')
+        self._rows = self._soup.find_all('div', class_='gs_r gs_or gs_scl') + self._soup.find_all('div', class_='gs_r gs_or gs_scl gs_fmar') + self._soup.find_all('div', class_='gsc_mpat_ttl')
 
     def _get_total_results(self):
         if self._soup.find("div", class_="gs_pda"):
diff --git a/test_module.py b/test_module.py
index b2a3ac5..19c688c 100644
--- a/test_module.py
+++ b/test_module.py
@@ -653,7 +653,7 @@ def test_search_pubs_empty_publication(self):
         """
         Test that searching for an empty publication returns zero results
         """
-        pubs = [p for p in scholarly.search_pubs('')]
+        pubs = [p for p in scholarly.search_pubs('Perception of physical stability and center of mass of 3D objects')]
         self.assertIs(len(pubs), 0)
 
     def test_search_pubs_citedby(self):
@@ -718,6 +718,23 @@ def test_search_pubs(self):
         titles = [p['bib']['title'] for p in pubs]
         self.assertIn('Visual perception of the physical stability of asymmetric three-dimensional objects', titles)
 
+    def test_search_pubs_single_pub(self):
+        """
+        As of Jun 24, 2024 there are is only one pub that fits the search term:
+        [Perception of physical stability and center of mass of 3D objects].
+
+        Check that it returns a proper result and the total results for that search term is equal to 1.
+        """
+        pub = scholarly.search_single_pub("Perception of physical stability and center of mass of 3D objects")
+        pubs = list(scholarly.search_pubs("Perception of physical stability and center of mass of 3D objects"))
+        # Check that the first entry in pubs is the same as pub.
+        # Checking for quality holds for non-dict entries only.
+        for key in {'author_id', 'pub_url', 'num_citations'}:
+            self.assertEqual(pub[key], pubs[0][key])
+        for key in {'title', 'pub_year', 'venue'}:
+            self.assertEqual(pub['bib'][key], pubs[0]['bib'][key])
+        self.assertEqual(len(pubs), 1)
+
     def test_search_pubs_total_results(self):
         """
         As of September 16, 2021 there are 32 pubs that fit the search term:

From 0db2befd2a7f1f500a9433010f516d86ebd63e3c Mon Sep 17 00:00:00 2001
From: keko24 <kotevski_andrej@hotmail.com>
Date: Mon, 24 Jun 2024 11:42:31 +0200
Subject: [PATCH 19/32] Fixed total_results returning 0 when only a single
 publication exists.

---
 scholarly/publication_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index 60d4769..5d7e728 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -70,7 +70,7 @@ def _get_total_results(self):
             match = re.match(pattern=r'(^|\s*About)\s*([0-9,\.\s’]+)', string=x.text)
             if match:
                 return int(re.sub(pattern=r'[,\.\s’]',repl='', string=match.group(2)))
-        return 0
+        return len(self._rows)
 
     # Iterator protocol
 

From 2cd59b3b8a3e5c10f4bb2fee15f5ea9d5363534a Mon Sep 17 00:00:00 2001
From: Andrej <56741017+keko24@users.noreply.github.com>
Date: Tue, 25 Jun 2024 11:58:24 +0200
Subject: [PATCH 20/32] Removed the string in search_pubs in
 test_search_empty_publication.

---
 test_module.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_module.py b/test_module.py
index 19c688c..bcd93e4 100644
--- a/test_module.py
+++ b/test_module.py
@@ -653,7 +653,7 @@ def test_search_pubs_empty_publication(self):
         """
         Test that searching for an empty publication returns zero results
         """
-        pubs = [p for p in scholarly.search_pubs('Perception of physical stability and center of mass of 3D objects')]
+        pubs = [p for p in scholarly.search_pubs('')]
         self.assertIs(len(pubs), 0)
 
     def test_search_pubs_citedby(self):

From 0765945fca6a933a508bd80bb87d746bcc620e61 Mon Sep 17 00:00:00 2001
From: Daniel Nisnevich <daniel.nisnevich@mail.huji.ac.il>
Date: Sun, 15 Sep 2024 09:28:18 +0300
Subject: [PATCH 21/32] Update publication_parser.py

changed line 61
---
 scholarly/publication_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index fa58cbf..8997d8f 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -58,7 +58,7 @@ def _load_url(self, url: str):
         # this is temporary until setup json file
         self._soup = self._nav._get_soup(url)
         self._pos = 0
-        self._rows = self._soup.find_all('div', class_='gs_r gs_or gs_scl') + self._soup.find_all('div', class_='gsc_mpat_ttl')
+        self._rows = self._soup.select("div.gs_r.gs_or.gs_scl") + self._soup.select("div.gsc_mpat_ttl")
 
     def _get_total_results(self):
         if self._soup.find("div", class_="gs_pda"):

From 0324b9179fc2002132e73bd8ddfa40dfe4d6ab80 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 28 Oct 2024 15:14:46 -0400
Subject: [PATCH 22/32] Add github action to codespell develop on push and PRs

---
 .github/workflows/codespell.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/codespell.yml

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 0000000..748abfb
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [develop]
+  pull_request:
+    branches: [develop]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2

From c8bf96439cad6c0d63f3ea5f6b90b24edb8e95f8 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 28 Oct 2024 15:14:46 -0400
Subject: [PATCH 23/32] Add rudimentary codespell config

---
 pyproject.toml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 9787c3b..a58d63b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,10 @@
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*'
+check-hidden = true
+# ignore-regex = ''
+# ignore-words-list = ''

From 3e5ae3108d924141a8fec56a5df66ceccedeef03 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 28 Oct 2024 15:16:12 -0400
Subject: [PATCH 24/32] adjust skips

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a58d63b..52ed30a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,5 +6,5 @@ build-backend = "setuptools.build_meta"
 # Ref: https://github.com/codespell-project/codespell#using-a-config-file
 skip = '.git*'
 check-hidden = true
-# ignore-regex = ''
+ignore-regex = '\b(assertIn|Ewha Womans|citeseerx.ist.psu.edu\S*)\b'
 # ignore-words-list = ''

From 16b5f89426ccdcbf2924cba9b5838d8c3508826c Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 28 Oct 2024 15:16:20 -0400
Subject: [PATCH 25/32] [DATALAD RUNCMD] run codespell throughout fixing few
 left typos automagically

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 CODE_OF_CONDUCT.md      | 2 +-
 scholarly/data_types.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index f5b0e27..9d18efa 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -8,7 +8,7 @@ permalink: /coc.html
 We as members, contributors, and leaders pledge to make participation in our
 community a harassment-free experience for everyone, regardless of age, body
 size, visible or invisible disability, ethnicity, sex characteristics, gender
-identity and expression, level of experience, education, socio-economic status,
+identity and expression, level of experience, education, socioeconomic status,
 nationality, personal appearance, race, religion, or sexual identity
 and orientation.
 
diff --git a/scholarly/data_types.py b/scholarly/data_types.py
index 13a9a38..d57b1ed 100644
--- a/scholarly/data_types.py
+++ b/scholarly/data_types.py
@@ -20,7 +20,7 @@ class PublicationSource(str, Enum):
 
     "PUBLICATION SEARCH SNIPPET".
     This form captures the publication  when it appears as a "snippet" in
-    the context of the resuls of a publication search. For example:
+    the context of the results of a publication search. For example:
 
     Publication search: https://scholar.google.com/scholar?hl=en&q=adaptive+fraud+detection&btnG=&as_sdt=0%2C33
 

From a4e6c8d00f877493498b914bcedd19200f087e95 Mon Sep 17 00:00:00 2001
From: nkxxll <niki.nice1203@gmail.com>
Date: Sun, 29 Dec 2024 15:36:32 +0100
Subject: [PATCH 26/32] docs(quickstart): add conda to install option from
 github README

---
 docs/quickstart.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 73b5787..019a9ee 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -16,6 +16,12 @@ or use ``pip`` to install from github:
 
     pip install git+https://github.com/scholarly-python-package/scholarly.git
 
+or use ``conda`` to install from ``conda-forge``:
+
+.. code:: bash
+
+    conda install -c conda-forge scholarly
+
 or clone the package using git:
 
 .. code:: bash

From 1b065eed19eba793daf638211e28f339397d31c9 Mon Sep 17 00:00:00 2001
From: brokenjade3000 <brochor3000@gmail.com>
Date: Sat, 8 Feb 2025 14:27:55 -0700
Subject: [PATCH 27/32] The current httpx doesn't support proxies arguments:
 https://github.com/encode/httpx/blob/master/httpx/_client.py#L239

---
 scholarly/_proxy_generator.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scholarly/_proxy_generator.py b/scholarly/_proxy_generator.py
index 2d2ec6f..3b37883 100644
--- a/scholarly/_proxy_generator.py
+++ b/scholarly/_proxy_generator.py
@@ -485,6 +485,10 @@ def _new_session(self, **kwargs):
                 # ScraperAPI requests to work.
                 # https://www.scraperapi.com/documentation/
                 init_kwargs["verify"] = False
+            if 'proxies' in init_kwargs:
+                proxy=init_kwargs['proxies']['https://']
+                del init_kwargs['proxies']
+                init_kwargs['proxy'] = proxy                       
         self._session = httpx.Client(**init_kwargs)
         self._webdriver = None
 

From 67dab6fabd35015c3206949baa388012e4c0883f Mon Sep 17 00:00:00 2001
From: Tobias Zimmermann <77075037+tZimmermann98@users.noreply.github.com>
Date: Mon, 3 Feb 2025 11:40:51 +0100
Subject: [PATCH 28/32] Update publication_parser.py

for arrow errors, fallback to regex year extraction
---
 scholarly/publication_parser.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index df54306..b023510 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -312,7 +312,11 @@ def fill(self, publication: Publication)->Publication:
                                 'YYYY/M/DD',
                                 'YYYY/M/D',
                                 'YYYY/MM/D']
-                    publication['bib']['pub_year'] = arrow.get(val.text, patterns).year
+                    try:
+                        publication['bib']['pub_year'] = arrow.get(val.text, patterns).year
+                    except ValueError:
+                        # fallback to regex year extraction if arrow fails
+                         publication['bib']['pub_year'] = re.search(r'\d{4}', val.text).group()
                     publication['bib']['pub_date'] = val.text
                 elif key == 'description':
                     # try to find all the gsh_csp if they exist

From db060439daab09a35bef78437594ebdf3fe7188d Mon Sep 17 00:00:00 2001
From: Tobias Zimmermann <77075037+tZimmermann98@users.noreply.github.com>
Date: Mon, 3 Feb 2025 11:52:05 +0100
Subject: [PATCH 29/32] fallback to regex year extraction or empty String when
 arrow fails

---
 scholarly/publication_parser.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index b023510..ca3ca16 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -316,7 +316,8 @@ def fill(self, publication: Publication)->Publication:
                         publication['bib']['pub_year'] = arrow.get(val.text, patterns).year
                     except ValueError:
                         # fallback to regex year extraction if arrow fails
-                         publication['bib']['pub_year'] = re.search(r'\d{4}', val.text).group()
+                        match = re.search(r'\d{4}', val.text)
+                        publication['bib']['pub_year'] = match.group() if match else ""
                     publication['bib']['pub_date'] = val.text
                 elif key == 'description':
                     # try to find all the gsh_csp if they exist

From eecaae552bee6f031f9416604f5d1234550c240f Mon Sep 17 00:00:00 2001
From: L <luenwarneke@gmail.com>
Date: Fri, 12 Apr 2024 09:46:49 +1000
Subject: [PATCH 30/32] Add in PDF link in publication fill

---
 scholarly/publication_parser.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index ca3ca16..3040f92 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -286,6 +286,10 @@ def fill(self, publication: Publication)->Publication:
             if soup.find('a', class_='gsc_oci_title_link'):
                 publication['pub_url'] = soup.find(
                     'a', class_='gsc_oci_title_link')['href']
+            if soup.find('div', class_='gsc_oci_title_ggi'):
+                link = soup.find('a', attrs={'data-clk': True})
+                if link:
+                    publication['pdf_url'] = link['href']
             for item in soup.find_all('div', class_='gs_scl'):
                 key = item.find(class_='gsc_oci_field').text.strip().lower()
                 val = item.find(class_='gsc_oci_value')

From 63f35925081be9040ead4dc8decc50e30f750cc8 Mon Sep 17 00:00:00 2001
From: L <luenwarneke@gmail.com>
Date: Fri, 12 Apr 2024 10:11:32 +1000
Subject: [PATCH 31/32] Update tests and add in pdf url from search results

---
 scholarly/publication_parser.py | 4 ++++
 test_module.py                  | 6 ++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index 3040f92..cb205d5 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -202,6 +202,10 @@ def _scholar_pub(self, __data, publication: Publication):
         if title.find('a'):
             publication['pub_url'] = title.find('a')['href']
 
+        pdf_div = __data.find('div', class_='gs_ggs gs_fl')
+        if pdf_div and pdf_div.find('a', href=True):
+            publication['pdf_url'] = pdf_div.find('a')['href']
+
         author_div_element = databox.find('div', class_='gs_a')
         authorinfo = author_div_element.text
         authorinfo = authorinfo.replace(u'\xa0', u' ')       # NBSP
diff --git a/test_module.py b/test_module.py
index e54e1aa..153c790 100644
--- a/test_module.py
+++ b/test_module.py
@@ -724,7 +724,7 @@ def test_search_pubs(self):
         pubs = list(scholarly.search_pubs('"naive physics" stability "3d shape"'))
         # Check that the first entry in pubs is the same as pub.
         # Checking for quality holds for non-dict entries only.
-        for key in {'author_id', 'pub_url', 'num_citations'}:
+        for key in {'author_id', 'pub_url', 'pdf_url', 'num_citations'}:
             self.assertEqual(pub[key], pubs[0][key])
         for key in {'title', 'pub_year', 'venue'}:
             self.assertEqual(pub['bib'][key], pubs[0]['bib'][key])
@@ -784,6 +784,7 @@ def test_search_pubs_filling_publication_contents(self):
         self.assertTrue(f['bib']['publisher'] == u'The Association for Research in Vision and Ophthalmology')
         self.assertTrue(f['bib']['title'] == u'Creating correct blur and its effect on accommodation')
         self.assertTrue(f['pub_url'] == u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
+        self.assertTrue(f['pdf_url'] == u'https://jov.arvojournals.org/arvo/content_public/journal/jov/937491/i1534-7362-18-9-1.pdf')
         self.assertTrue(f['bib']['volume'] == '18')
         self.assertTrue(f['bib']['pub_year'] == u'2018')
 
@@ -800,6 +801,7 @@ def test_related_articles_from_author(self):
         # Typically, the same publication is returned as the most related article
         same_article = next(related_articles)
         self.assertEqual(pub["pub_url"], same_article["pub_url"])
+        self.assertEqual(pub["pdf_url"], same_article["pdf_url"])
         for key in {'title', 'pub_year'}:
             self.assertEqual(str(pub['bib'][key]), (same_article['bib'][key]))
 
@@ -818,7 +820,7 @@ def test_related_articles_from_publication(self):
         related_articles = scholarly.get_related_articles(pub)
         # Typically, the same publication is returned as the most related article
         same_article = next(related_articles)
-        for key in {'author_id', 'pub_url', 'num_citations'}:
+        for key in {'author_id', 'pub_url', 'pdf_url', 'num_citations'}:
             self.assertEqual(pub[key], same_article[key])
         for key in {'title', 'pub_year'}:
             self.assertEqual(pub['bib'][key], same_article['bib'][key])

From 35f97d7bae60be438b125f7c3008ded2d0e929c1 Mon Sep 17 00:00:00 2001
From: L <Luen@users.noreply.github.com>
Date: Thu, 12 Dec 2024 12:30:11 +1000
Subject: [PATCH 32/32] Renamed "pdf_url" to "eprint_url"

---
 scholarly/publication_parser.py | 4 ++--
 test_module.py                  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py
index cb205d5..e03113b 100644
--- a/scholarly/publication_parser.py
+++ b/scholarly/publication_parser.py
@@ -204,7 +204,7 @@ def _scholar_pub(self, __data, publication: Publication):
 
         pdf_div = __data.find('div', class_='gs_ggs gs_fl')
         if pdf_div and pdf_div.find('a', href=True):
-            publication['pdf_url'] = pdf_div.find('a')['href']
+            publication['eprint_url'] = pdf_div.find('a')['href']
 
         author_div_element = databox.find('div', class_='gs_a')
         authorinfo = author_div_element.text
@@ -293,7 +293,7 @@ def fill(self, publication: Publication)->Publication:
             if soup.find('div', class_='gsc_oci_title_ggi'):
                 link = soup.find('a', attrs={'data-clk': True})
                 if link:
-                    publication['pdf_url'] = link['href']
+                    publication['eprint_url'] = link['href']
             for item in soup.find_all('div', class_='gs_scl'):
                 key = item.find(class_='gsc_oci_field').text.strip().lower()
                 val = item.find(class_='gsc_oci_value')
diff --git a/test_module.py b/test_module.py
index 153c790..bf464e3 100644
--- a/test_module.py
+++ b/test_module.py
@@ -724,7 +724,7 @@ def test_search_pubs(self):
         pubs = list(scholarly.search_pubs('"naive physics" stability "3d shape"'))
         # Check that the first entry in pubs is the same as pub.
         # Checking for quality holds for non-dict entries only.
-        for key in {'author_id', 'pub_url', 'pdf_url', 'num_citations'}:
+        for key in {'author_id', 'pub_url', 'eprint_url', 'num_citations'}:
             self.assertEqual(pub[key], pubs[0][key])
         for key in {'title', 'pub_year', 'venue'}:
             self.assertEqual(pub['bib'][key], pubs[0]['bib'][key])
@@ -784,7 +784,7 @@ def test_search_pubs_filling_publication_contents(self):
         self.assertTrue(f['bib']['publisher'] == u'The Association for Research in Vision and Ophthalmology')
         self.assertTrue(f['bib']['title'] == u'Creating correct blur and its effect on accommodation')
         self.assertTrue(f['pub_url'] == u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
-        self.assertTrue(f['pdf_url'] == u'https://jov.arvojournals.org/arvo/content_public/journal/jov/937491/i1534-7362-18-9-1.pdf')
+        self.assertTrue(f['eprint_url'] == u'https://jov.arvojournals.org/arvo/content_public/journal/jov/937491/i1534-7362-18-9-1.pdf')
         self.assertTrue(f['bib']['volume'] == '18')
         self.assertTrue(f['bib']['pub_year'] == u'2018')
 
@@ -801,7 +801,7 @@ def test_related_articles_from_author(self):
         # Typically, the same publication is returned as the most related article
         same_article = next(related_articles)
         self.assertEqual(pub["pub_url"], same_article["pub_url"])
-        self.assertEqual(pub["pdf_url"], same_article["pdf_url"])
+        self.assertEqual(pub["eprint_url"], same_article["eprint_url"])
         for key in {'title', 'pub_year'}:
             self.assertEqual(str(pub['bib'][key]), (same_article['bib'][key]))
 
@@ -820,7 +820,7 @@ def test_related_articles_from_publication(self):
         related_articles = scholarly.get_related_articles(pub)
         # Typically, the same publication is returned as the most related article
         same_article = next(related_articles)
-        for key in {'author_id', 'pub_url', 'pdf_url', 'num_citations'}:
+        for key in {'author_id', 'pub_url', 'eprint_url', 'num_citations'}:
             self.assertEqual(pub[key], same_article[key])
         for key in {'title', 'pub_year'}:
             self.assertEqual(pub['bib'][key], same_article['bib'][key])