diff --git a/.travis.yml b/.travis.yml index 85e0d6b..d52ecd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,13 @@ language: python -python: - - "2.6" - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" +matrix: + include: + - python: 2.7 + - python: 3.4 + - python: 3.5 + - python: 3.6 + - python: 3.7 + dist: xenial + sudo: true env: CFLAGS="-O0" cache: diff --git a/appveyor.yml b/appveyor.yml index df3faf9..095774d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -4,9 +4,9 @@ environment: - PYTHON: "C:\\Python27" - PYTHON: "C:\\Python33" - PYTHON: "C:\\Python34" - # Appveyor does not currently find the wheels for lxml, and cannot build lxml from source. Disable these for now. - # - PYTHON: "C:\\Python35" - # - PYTHON: "C:\\Python36" + - PYTHON: "C:\\Python35" + - PYTHON: "C:\\Python36" + - PYTHON: "C:\\Python37" build: off diff --git a/pdfquery/pdfquery.py b/pdfquery/pdfquery.py index aa7f584..3708a96 100644 --- a/pdfquery/pdfquery.py +++ b/pdfquery/pdfquery.py @@ -7,6 +7,8 @@ import numbers import re import chardet +import sys +import hashlib try: from collections import OrderedDict except ImportError: @@ -84,7 +86,11 @@ def _comp_bbox(el, el2): # assorted helpers -def _flatten(l, ltypes=(list, tuple)): +LTYPES = (list, tuple) +if sys.version_info.major > 2: + LTYPES = (list, tuple, range) + +def _flatten(l, ltypes=LTYPES): # via http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html ltype = type(l) l = list(l) @@ -458,7 +464,9 @@ def get_tree(self, *page_numbers): Return lxml.etree.ElementTree for entire document, or page numbers given if any. """ - cache_key = "_".join(map(str, _flatten(page_numbers))) + hasher = hashlib.md5() + hasher.update(str(page_numbers).encode('UTF-8')) + cache_key = "_{}".format(hasher.hexdigest()) tree = self._parse_tree_cacher.get(cache_key) if tree is None: # set up root diff --git a/tests/samples/bug67.pdf b/tests/samples/bug67.pdf new file mode 100644 index 0000000..42e0717 Binary files /dev/null and b/tests/samples/bug67.pdf differ diff --git a/tests/tests.py b/tests/tests.py index 044c3e7..1c4c5bb 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -8,6 +8,7 @@ import sys import pdfquery from pdfquery.cache import FileCache +import tempfile from .utils import BaseTestCase @@ -157,3 +158,33 @@ def test_annot_dereferencing(self): pdf.load() pdf = pdfquery.PDFQuery("tests/samples/bug42.pdf") pdf.load() + +class TestPageRange(BaseTestCase): + """ + Test various page number parameters + """ + + @classmethod + def setUpClass(cls): + cache_dir = "{}/".format(tempfile.gettempdir()) + cls.pdf = pdfquery.PDFQuery("tests/samples/bug67.pdf", parse_tree_cacher=FileCache(cache_dir)) + + def test_page_int(self): + self.pdf.load(3) + self.assertEqual(len(self.pdf.pq('LTPage')), 1) + self.pdf.load(0, 10, 25, 49) + self.assertEqual(len(self.pdf.pq('LTPage')), 4) + + def test_page_array(self): + self.pdf.load([0, 7, 11]) + self.assertEqual(len(self.pdf.pq('LTPage')), 3) + self.pdf.load([10], [0, 12], [30, 40]) + self.assertEqual(len(self.pdf.pq('LTPage')), 5) + + def test_page_mixed(self): + self.pdf.load([0, 7, 11], [0, 44], 1) + self.assertEqual(len(self.pdf.pq('LTPage')), 6) + + def test_page_range(self): + self.pdf.load(range(0, 150)) + self.assertEqual(len(self.pdf.pq('LTPage')), 150)