Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 21 additions & 31 deletions pycdlib/dr.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class DirectoryRecord:
'index_in_parent', 'dr_len', 'xattr_len', 'file_flags',
'file_unit_size', 'interleave_gap_size', 'len_fi', 'isdir',
'orig_extent_loc', 'data_length', 'seqnum', 'is_root',
'parent', 'rock_ridge', 'xa_record', 'file_ident')
'parent', 'rock_ridge', 'xa_record', 'file_ident', '_sort_key')

FILE_FLAG_EXISTENCE_BIT = 0
FILE_FLAG_DIRECTORY_BIT = 1
Expand Down Expand Up @@ -285,12 +285,20 @@ def parse(self, vd, record, parent, xa=False):

if self.is_root:
self._printable_name = '/'.encode(vd.encoding)
self._sort_key = b'\x00'
elif self.file_ident == b'\x00':
self._printable_name = '.'.encode(vd.encoding)
self._sort_key = b'\x00'
elif self.file_ident == b'\x01':
self._printable_name = '..'.encode(vd.encoding)
self._sort_key = b'\x01'
else:
self._printable_name = self.file_ident
# Bisect sort key for __lt__: dot/dotdot get b'\x00'/b'\x01' so
# they always sort first; real names get a 0xff prefix so they
# stay above dotdot even for Joliet, where UCS-2 names start
# with a 0x00 byte and would otherwise compare lower than b'\x01'.
self._sort_key = b'\xff' + self.file_ident

if self.parent is not None:
if xa:
Expand Down Expand Up @@ -514,12 +522,16 @@ def _new(self, vd, name, parent, seqnum, isdir, length, xa, date_seconds):

if self.is_root:
self._printable_name = '/'.encode(vd.encoding)
self._sort_key = b'\x00'
elif self.file_ident == b'\x00':
self._printable_name = '.'.encode(vd.encoding)
self._sort_key = b'\x00'
elif self.file_ident == b'\x01':
self._printable_name = '..'.encode(vd.encoding)
self._sort_key = b'\x01'
else:
self._printable_name = self.file_ident
self._sort_key = b'\xff' + self.file_ident

self.vd = vd

Expand Down Expand Up @@ -1255,36 +1267,14 @@ def fp_offset(self):
############# END BACKWARDS COMPATIBILITY #################################

def __lt__(self, other):
# This method is used for the bisect.insort_left() when adding a child.
# It needs to return whether self is less than other. Here we use the
# ISO9660 sorting order which is essentially:
#
# 1. The \x00 is always the 'dot' record, and is always first.
# 2. The \x01 is always the 'dotdot' record, and is always second.
# 3. Other entries are sorted lexically; this does not exactly match
# the sorting method specified in Ecma-119, but does OK for now.
#
# Ecma-119 Section 9.3 specifies that we need to pad out the shorter of
# the two files with 0x20 (spaces), then compare byte-by-byte until
# they differ. However, we can more easily just do the string equality
# comparison, since it will always be the case that 0x20 will be less
# than any of the other allowed characters in the strings.
if self.file_ident == b'\x00':
if other.file_ident == b'\x00':
return False
return True
if other.file_ident == b'\x00':
return False

if self.file_ident == b'\x01':
if other.file_ident == b'\x00':
return False
return True

if other.file_ident == b'\x01':
# If self.file_ident was '\x00', it would have been caught above.
return False
return self.file_ident < other.file_ident
# Used by bisect.bisect_left() in _add_child(). Both records carry a
# precomputed _sort_key (set when file_ident is finalized) that
# encodes the ECMA-119 ordering: dot first, then dotdot, then real
# names — see the _sort_key assignments in parse() / _new(). Real
# names compare bytewise; that doesn't match the strict ECMA-119
# 9.3 padding rule but is equivalent here, since the pad byte 0x20
# is less than any other allowed file-identifier character.
return self._sort_key < other._sort_key

def __ne__(self, other):
# type: (object) -> bool
Expand Down
28 changes: 15 additions & 13 deletions pycdlib/pycdlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,17 +432,17 @@ def _yield_children(rec, rr):
continue
last = fi

skip_child = False
if rr:
if child.rock_ridge is not None:
for inner_child in child.children:
if inner_child.is_dotdot():
if inner_child.rock_ridge is not None and inner_child.rock_ridge.parent_link_record_exists():
skip_child = True
break

if skip_child:
continue
# For a non-root directory, dot sits at children[0] and dotdot
# at children[1] (file_ident sort: b'\x00' < b'\x01' < ...).
# If dotdot carries a PL record this is a relocated
# destination, which is yielded via the cl_to_moved_dr path
# elsewhere — skip it here to avoid a duplicate.
if len(child.children) >= 2:
dotdot = child.children[1]
if dotdot.rock_ridge is not None and dotdot.rock_ridge.parent_link_record_exists():
continue

if child.rock_ridge.child_link_record_exists() and \
child.rock_ridge.cl_to_moved_dr is not None and \
Expand Down Expand Up @@ -489,18 +489,20 @@ def _find_dr_record_by_name(vd, path, encoding):

entry = root_dir_record

tmpdr = dr.DirectoryRecord()

while True:
child = None

thelist = entry.children
# Bisect for currpath among the real entries. Children index 0/1 are
# dot/dotdot, so we start at 2. All entries from index 2 on are real
# names that compare bytewise, matching dr.__lt__ on the real-entry
# path; comparing file_ident directly avoids materializing a scratch
# DirectoryRecord just to drive __lt__.
lo = 2
hi = len(thelist)
while lo < hi:
mid = (lo + hi) // 2
tmpdr.file_ident = currpath
if thelist[mid] < tmpdr:
if thelist[mid].file_ident < currpath:
lo = mid + 1
else:
hi = mid
Expand Down
Loading