30
30
31
31
from typing import TYPE_CHECKING , List , Optional , Tuple , Union , cast
32
32
33
- from . import ArrayObject , DictionaryObject , IndirectObject , PdfObject , TextStringObject
33
+ from . import ArrayObject , DictionaryObject , IndirectObject , NullObject , PdfObject , TextStringObject
34
34
35
35
if TYPE_CHECKING :
36
36
from .._page import PageObject
37
+ from .._protocols import PdfCommonDocProtocol
37
38
from .._reader import PdfReader
38
39
from .._writer import PdfWriter
40
+ from ..generic import Destination
39
41
40
42
41
43
class NamedReferenceLink :
42
44
"""Named reference link being preserved until we can resolve it correctly."""
43
45
44
- def __init__ (self , reference : TextStringObject , source_pdf : "PdfReader " ) -> None :
46
+ def __init__ (self , reference : TextStringObject , page : "PageObject " ) -> None :
45
47
"""reference: TextStringObject with named reference"""
46
48
self ._reference = reference
47
- self ._source_pdf = source_pdf
49
+
50
+ # to work out where the reference points we need to find the
51
+ # source PDF which the reference is pointing to. this *can*
52
+ # be the PDF the page containing the link comes from, but it
53
+ # may also be some other PDF merged into this page, so we need
54
+ # to do a little search
55
+ destination = self ._find_page_in (page .pdf )
56
+
57
+ if not destination :
58
+ for src_page in page ._merged_in_pages :
59
+ destination = self ._find_page_in (src_page .pdf )
60
+ break
61
+
62
+ if destination and not isinstance (destination .dest_array [0 ], NullObject ):
63
+ self ._referenced_page = destination .dest_array [0 ]
64
+ else :
65
+ self ._referenced_page = None
66
+
67
+ def _find_page_in (self , pdf : "Optional[PdfCommonDocProtocol]" ) -> "Optional[Destination]" :
68
+ if not pdf or not hasattr (pdf , "named_destinations" ):
69
+ return None
70
+ reader : PdfReader = cast ("PdfReader" , pdf )
71
+ return reader .named_destinations .get (str (self ._reference ))
48
72
49
73
def find_referenced_page (self ) -> Union [IndirectObject , None ]:
50
- destination = self ._source_pdf .named_destinations .get (str (self ._reference ))
51
- return destination .page if destination else None
74
+ if self ._referenced_page :
75
+ return self ._referenced_page .indirect_reference
76
+ return None
52
77
53
78
def patch_reference (self , target_pdf : "PdfWriter" , new_page : IndirectObject ) -> None :
54
79
"""target_pdf: PdfWriter which the new link went into"""
@@ -90,7 +115,6 @@ def extract_links(new_page: "PageObject", old_page: "PageObject") -> List[Tuple[
90
115
91
116
92
117
def _build_link (indirect_object : IndirectObject , page : "PageObject" ) -> Optional [ReferenceLink ]:
93
- src = cast ("PdfReader" , page .pdf )
94
118
link = cast (DictionaryObject , indirect_object .get_object ())
95
119
if (not isinstance (link , DictionaryObject )) or link .get ("/Subtype" ) != "/Link" :
96
120
return None
@@ -100,17 +124,17 @@ def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional
100
124
if action .get ("/S" ) != "/GoTo" :
101
125
return None
102
126
103
- return _create_link (action ["/D" ], src )
127
+ return _create_link (action ["/D" ], page )
104
128
105
129
if "/Dest" in link :
106
- return _create_link (link ["/Dest" ], src )
130
+ return _create_link (link ["/Dest" ], page )
107
131
108
132
return None # Nothing to do here
109
133
110
134
111
- def _create_link (reference : PdfObject , source_pdf : "PdfReader " )-> Optional [ReferenceLink ]:
135
+ def _create_link (reference : PdfObject , page : "PageObject " )-> Optional [ReferenceLink ]:
112
136
if isinstance (reference , TextStringObject ):
113
- return NamedReferenceLink (reference , source_pdf )
137
+ return NamedReferenceLink (reference , page )
114
138
if isinstance (reference , ArrayObject ):
115
139
return DirectReferenceLink (reference )
116
140
return None
0 commit comments