Viewing: posix_util.py
# -*- coding: utf-8 -*-
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper file for POSIX methods."""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals
from calendar import timegm
import getpass
import logging
import os
import re
import time
import six
from gslib.exception import CommandException
from gslib.tz_utc import UTC
from gslib.utils.metadata_util import CreateCustomMetadata
from gslib.utils.metadata_util import GetValueFromObjectCustomMetadata
from gslib.utils.system_util import IS_WINDOWS
from gslib.utils.unit_util import SECONDS_PER_DAY
# pylint: disable=g-import-not-at-top
if not IS_WINDOWS:
import grp
import pwd
if six.PY3:
long = int
# Metadata attribute names for POSIX attributes.
ATIME_ATTR = 'goog-reserved-file-atime'
GID_ATTR = 'goog-reserved-posix-gid'
MODE_ATTR = 'goog-reserved-posix-mode'
MTIME_ATTR = 'goog-reserved-file-mtime'
UID_ATTR = 'goog-reserved-posix-uid'
# NA_TIME is a long value that takes the place of any invalid mtime.
NA_TIME = -1
# NA_ID is a value that takes the place of any invalid POSIX UID or GID.
NA_ID = -1
# NA_MODE is an octal value that takes the place of any invalid POSIX file mode.
NA_MODE = -0o1
MODE_REGEX = re.compile('^[0-7]{3}$')
# POSIX Permissions Bit-masks
# User.
U_R = 0o400
U_W = 0o200
U_X = 0o100
# Group.
G_R = 0o040
G_W = 0o020
G_X = 0o010
# Other.
O_R = 0o004
O_W = 0o002
O_X = 0o001
# The permissions newly created files get by default.
SYSTEM_POSIX_MODE = None
# A list of group IDs that the current user is a member of.
USER_GROUPS = set()
class POSIXAttributes(object):
"""Class to hold POSIX attributes for a file/object."""
def __init__(self,
atime=NA_TIME,
mtime=NA_TIME,
uid=NA_ID,
gid=NA_ID,
mode=None):
"""Constructor for POSIXAttributes class which holds relevant data.
Args:
atime: The access time of the file/object.
mtime: The modification time of the file/object.
uid: The user ID that owns the file.
gid: The group ID that the user is in.
mode: An instance of POSIXMode.
"""
self.atime = atime
self.mtime = mtime
self.uid = uid
self.gid = gid
self.mode = POSIXMode(mode if mode else NA_MODE)
class POSIXMode(object):
def __init__(self, permissions):
self.permissions = permissions
def ConvertModeToBase8(mode):
"""Converts a base-10 mode integer from os.stat to base-8."""
# Strip out unnecessary bits in the mode. Mode is given as a base-10
# integer. It must be converted to base-8.
return int(oct(mode)[-3:])
def DeserializeFileAttributesFromObjectMetadata(obj_metadata, url_str):
"""Parses the POSIX attributes from the supplied metadata.
Args:
obj_metadata: The metadata for an object.
url_str: File/object path that provides context if a warning is thrown.
Returns:
A POSIXAttribute object with the retrieved values or a default value for
any attribute that could not be found.
"""
posix_attrs = POSIXAttributes()
# Parse atime.
found, atime = GetValueFromObjectCustomMetadata(obj_metadata, ATIME_ATTR,
NA_TIME)
try:
atime = long(atime)
if found and atime <= NA_TIME:
WarnNegativeAttribute('atime', url_str)
atime = NA_TIME
elif atime > long(time.time()) + SECONDS_PER_DAY:
WarnFutureTimestamp('atime', url_str)
atime = NA_TIME
except ValueError:
WarnInvalidValue('atime', url_str)
atime = NA_TIME
posix_attrs.atime = atime
# Parse gid.
DeserializeIDAttribute(obj_metadata, GID_ATTR, url_str, posix_attrs)
# Parse uid.
DeserializeIDAttribute(obj_metadata, UID_ATTR, url_str, posix_attrs)
found, mode = GetValueFromObjectCustomMetadata(obj_metadata, MODE_ATTR,
NA_MODE)
if found and MODE_REGEX.match(mode):
try:
# Parse mode into a 3-digit base-8 number.
posix_attrs.mode = POSIXMode(int(mode))
except ValueError:
WarnInvalidValue('mode', url_str)
return posix_attrs
def SerializeFileAttributesToObjectMetadata(posix_attrs,
custom_metadata,
preserve_posix=False):
"""Takes a POSIXAttributes object and serializes it into custom metadata.
Args:
posix_attrs: A POSIXAttributes object.
custom_metadata: A custom metadata object to serialize values into.
preserve_posix: Whether or not to preserve POSIX attributes other than
mtime.
"""
# mtime will always be needed in the object metadata for rsync.
if posix_attrs.mtime != NA_TIME:
CreateCustomMetadata(entries={MTIME_ATTR: posix_attrs.mtime},
custom_metadata=custom_metadata)
# Only add other POSIX attributes if the preserve_posix flag is set.
if preserve_posix:
if posix_attrs.atime != NA_TIME:
CreateCustomMetadata(entries={ATIME_ATTR: posix_attrs.atime},
custom_metadata=custom_metadata)
if posix_attrs.uid != NA_ID:
CreateCustomMetadata(entries={UID_ATTR: posix_attrs.uid},
custom_metadata=custom_metadata)
if posix_attrs.gid != NA_ID:
CreateCustomMetadata(entries={GID_ATTR: posix_attrs.gid},
custom_metadata=custom_metadata)
if posix_attrs.mode.permissions != NA_MODE:
CreateCustomMetadata(entries={MODE_ATTR: posix_attrs.mode.permissions},
custom_metadata=custom_metadata)
def DeserializeIDAttribute(obj_metadata, attr, url_str, posix_attrs):
"""Parses the POSIX attributes from the supplied metadata into posix_attrs.
Args:
obj_metadata: The metadata for an object.
attr: Either GID_ATTR or UID_ATTR.
url_str: File/object path that provides context if a warning is thrown.
posix_attrs: POSIXAttribute object.
"""
attr_name = attr.split('-')[-1]
found, val = GetValueFromObjectCustomMetadata(obj_metadata, attr, NA_ID)
try:
val = int(val)
if found and val <= NA_ID:
WarnNegativeAttribute(attr_name, url_str)
val = NA_ID
except ValueError:
WarnInvalidValue(attr_name, url_str)
val = NA_ID
setattr(posix_attrs, attr_name, val)
def NeedsPOSIXAttributeUpdate(src_atime, dst_atime, src_mtime, dst_mtime,
src_uid, dst_uid, src_gid, dst_gid, src_mode,
dst_mode):
"""Checks whether an update for any POSIX attribute is needed.
Args:
src_atime: The source access time.
dst_atime: The destination access time.
src_mtime: The source modification time.
dst_mtime: The destination modification time.
src_uid: The source user ID.
dst_uid: The destination user ID.
src_gid: The source group ID.
dst_gid: The destination group ID.
src_mode: The source mode.
dst_mode: The destination mode.
Returns:
A tuple containing a POSIXAttribute object and a boolean for whether an
update was needed.
"""
posix_attrs = POSIXAttributes()
has_src_atime = src_atime > NA_TIME
has_dst_atime = dst_atime > NA_TIME
has_src_mtime = src_mtime > NA_TIME
has_dst_mtime = dst_mtime > NA_TIME
has_src_uid = src_uid > NA_ID
has_dst_uid = dst_uid > NA_ID
has_src_gid = src_gid > NA_ID
has_dst_gid = dst_gid > NA_ID
has_src_mode = src_mode > NA_MODE
has_dst_mode = dst_mode > NA_MODE
if has_src_atime and not has_dst_atime:
posix_attrs.atime = src_atime
if has_src_mtime and not has_dst_mtime:
posix_attrs.mtime = src_mtime
if has_src_uid and not has_dst_uid:
posix_attrs.uid = src_uid
if has_src_gid and not has_dst_gid:
posix_attrs.gid = src_gid
if has_src_mode and not has_dst_mode:
posix_attrs.mode.permissions = src_mode
return posix_attrs, ((has_src_atime and not has_dst_atime) or
(has_src_mtime and not has_dst_mtime) or
(has_src_uid and not has_dst_uid) or
(has_src_gid and not has_dst_gid) or
(has_src_mode and not has_dst_mode))
def ValidatePOSIXMode(mode):
"""Validates whether the mode is valid.
In order for the mode to be valid either the user, group, or other byte must
be >= 4.
Args:
mode: The mode as a 3-digit, base-8 integer.
Returns:
True/False
"""
return MODE_REGEX.match(oct(mode)[-3:]) and (mode & U_R or mode & G_R or
mode & O_R)
def ValidateFilePermissionAccess(url_str, uid=NA_ID, gid=NA_ID, mode=NA_MODE):
"""Validates that the user has file access if uid, gid, and mode are applied.
Args:
url_str: The path to the object for which this is validating.
uid: A POSIX user ID.
gid: A POSIX group ID.
mode: A 3-digit, number representing POSIX permissions, must be in base-8.
Returns:
A (bool, str) tuple, True if and only if it's safe to copy the file, and a
string containing details for the error.
"""
# Windows doesn't use the POSIX system for file permissions, so all files will
# validate.
if IS_WINDOWS:
return True, ''
uid_present = uid > NA_ID
gid_present = int(gid) > NA_ID
mode_present = mode > NA_MODE
# No need to perform validation if Posix attrs are not being preserved.
if not (uid_present or gid_present or mode_present):
return True, ''
# The root user on non-Windows systems can access files regardless of their
# permissions.
if os.geteuid() == 0:
return True, ''
mode_valid = ValidatePOSIXMode(int(str(mode), 8))
if mode_present:
if not mode_valid:
return False, 'Mode for %s won\'t allow read access.' % url_str
else:
# Calculate the default mode if the mode doesn't exist.
# Convert mode to a 3-digit, base-8 integer.
mode = int(SYSTEM_POSIX_MODE)
if uid_present:
try:
pwd.getpwuid(uid)
except (KeyError, OverflowError):
return (False, 'UID for %s doesn\'t exist on current system. uid: %d' %
(url_str, uid))
if gid_present:
try:
grp.getgrgid(gid)
except (KeyError, OverflowError):
return (False, 'GID for %s doesn\'t exist on current system. gid: %d' %
(url_str, gid))
# uid at this point must exist, but isn't necessarily the current user.
# Likewise, gid must also exist at this point.
uid_is_current_user = uid == os.getuid()
# By this point uid and gid must exist on the system. However, the uid might
# not match the current user's or the current user might not be a member of
# the group identified by gid. In this case, the 'other' byte of the
# permissions could provide sufficient access.
mode = int(str(mode), 8)
# Check that if the uid is not present and the gid and mode are, so that we
# won't orphan the file. For example if the mode is set to 007, we can orphan
# the file because the uid would default to the current user's ID and if the
# current user wouldn't have read access or better, the file will be orphaned
# even though they might otherwise have access through the gid or other bytes.
if not uid_present and gid_present and mode_present and not bool(mode & U_R):
return (False, 'Insufficient access with uid/gid/mode for %s, gid: %d, '
'mode: %s' % (url_str, gid, oct(mode)[-3:]))
if uid_is_current_user:
valid = bool(mode & U_R)
return (valid, '' if valid else
'Insufficient access with uid/gid/mode for %s, uid: %d, '
'mode: %s' % (url_str, uid, oct(mode)[-3:]))
elif int(gid) in USER_GROUPS:
valid = bool(mode & G_R)
return (valid, '' if valid else
'Insufficient access with uid/gid/mode for %s, gid: %d, '
'mode: %s' % (url_str, gid, oct(mode)[-3:]))
elif mode & O_R:
return True, ''
elif not uid_present and not gid_present and mode_valid:
return True, ''
return False, 'There was a problem validating %s.' % url_str
def ParseAndSetPOSIXAttributes(path,
obj_metadata,
is_rsync=False,
preserve_posix=False):
"""Parses POSIX attributes from obj_metadata and sets them.
Attributes will only be set if they exist in custom metadata. This function
should only be called after ValidateFilePermissionAccess has been called for
the specific file/object so as not to orphan files.
Args:
path: The local filesystem path for the file. Valid metadata attributes will
be set for the file located at path, some attributes will only be set
if preserve_posix is set to True.
obj_metadata: The metadata for the file/object.
is_rsync: Whether or not the caller is the rsync command. Used to determine
if timeCreated should be used.
preserve_posix: Whether or not all POSIX attributes should be set.
"""
if obj_metadata is None:
# This exception is meant for debugging purposes as it should never be
# thrown unless there are unexpected code changes.
raise CommandException('obj_metadata cannot be None for %s' % path)
try:
found_at, atime = GetValueFromObjectCustomMetadata(obj_metadata,
ATIME_ATTR,
default_value=NA_TIME)
found_mt, mtime = GetValueFromObjectCustomMetadata(obj_metadata,
MTIME_ATTR,
default_value=NA_TIME)
found_uid, uid = GetValueFromObjectCustomMetadata(obj_metadata,
UID_ATTR,
default_value=NA_ID)
found_gid, gid = GetValueFromObjectCustomMetadata(obj_metadata,
GID_ATTR,
default_value=NA_ID)
found_mode, mode = GetValueFromObjectCustomMetadata(obj_metadata,
MODE_ATTR,
default_value=NA_MODE)
if found_mt:
mtime = long(mtime)
if not preserve_posix:
atime_tmp = os.stat(path).st_atime
os.utime(path, (atime_tmp, mtime))
return
elif is_rsync:
mtime = ConvertDatetimeToPOSIX(obj_metadata.timeCreated)
os.utime(path, (mtime, mtime))
if not preserve_posix:
return
if found_at:
atime = long(atime)
if atime > NA_TIME and mtime > NA_TIME:
# Set both atime and mtime.
os.utime(path, (atime, mtime))
elif atime > NA_TIME and mtime <= NA_TIME:
# atime is valid but mtime isn't.
mtime_tmp = os.stat(path).st_mtime
os.utime(path, (atime, mtime_tmp))
elif atime <= NA_TIME and mtime > NA_TIME:
# mtime is valid but atime isn't
atime_tmp = os.stat(path).st_atime
os.utime(path, (atime_tmp, mtime))
if IS_WINDOWS:
# Windows doesn't use POSIX uid/gid/mode unlike other systems. So there's
# no point continuing.
return
# Only root can change ownership.
if found_uid and os.geteuid() == 0:
uid = int(uid)
else:
uid = NA_ID
if found_gid:
gid = int(gid)
if uid > NA_ID and gid > NA_ID:
# Set both uid and gid.
os.chown(path, uid, gid)
elif uid > NA_ID and gid <= NA_ID:
# uid is valid but gid isn't.
os.chown(path, uid, -1)
elif uid <= NA_ID and gid > NA_ID:
# gid is valid but uid isn't.
os.chown(path, -1, gid)
if found_mode:
mode = int(str(mode), 8)
os.chmod(path, mode)
except ValueError:
raise CommandException('Check POSIX attribute values for %s' %
obj_metadata.name)
def WarnNegativeAttribute(attr_name, url_str):
"""Logs if an attribute has a negative value.
Args:
attr_name: The name of the attribute to log.
url_str: The path of the file for context.
"""
logging.getLogger().warn('%s has a negative %s in its metadata', url_str,
attr_name)
def WarnInvalidValue(attr_name, url_str):
"""Logs if an attribute has an invalid value.
Args:
attr_name: The name of the attribute to log.
url_str: The path of the file for context.
"""
logging.getLogger().warn('%s has an invalid %s in its metadata', url_str,
attr_name)
def WarnFutureTimestamp(attr_name, url_str):
"""Logs if an attribute has an invalid value.
Args:
attr_name: The name of the attribute to log.
url_str: The path of the file for context.
"""
logging.getLogger().warn(
'%s has an %s more than 1 day from current system'
' time', url_str, attr_name)
def ConvertDatetimeToPOSIX(dt):
"""Converts a datetime object to UTC and formats as POSIX.
Sanitize the timestamp returned in dt, and put it in UTC format. For more
information see the UTC class.
Args:
dt: A Python datetime object.
Returns:
A POSIX timestamp according to UTC.
"""
return long(timegm(dt.replace(tzinfo=UTC()).timetuple()))
def InitializeDefaultMode():
"""Records the default POSIX mode using os.umask."""
global SYSTEM_POSIX_MODE
if IS_WINDOWS:
# os.umask returns 0 on Windows. Below math works out to 666.
SYSTEM_POSIX_MODE = '666'
return
# umask returns the permissions that should not be granted, so they must be
# subtracted from the maximum set of permissions.
max_permissions = 0o777
current_umask = os.umask(0o177)
os.umask(current_umask)
mode = max_permissions - current_umask
# Files are not given execute privileges by default. Therefore we need to
# subtract one from every odd permissions value. This is done via a bitmask.
SYSTEM_POSIX_MODE = oct(mode & 0o666)[-3:]
def InitializeUserGroups():
"""Initializes the set of groups that the user is in.
Should only be called if the flag for preserving POSIX attributes is set.
"""
global USER_GROUPS
if IS_WINDOWS:
return
user_id = os.getuid()
user_name = pwd.getpwuid(user_id).pw_name
USER_GROUPS = set(
# Primary group
[pwd.getpwuid(user_id).pw_gid] +
# Secondary groups
[g.gr_gid for g in grp.getgrall() if user_name in g.gr_mem])
def InitializePreservePosixData():
"""Initializes POSIX data. Run once at the beginning of a copy."""
InitializeDefaultMode()
InitializeUserGroups()
Back to File Manager