Server Admin

Viewing: cat_helper.py

# -*- coding: utf-8 -*-
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper for cat and cp streaming download."""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import io
import sys

from boto import config

from gslib.cloud_api import EncryptionException
from gslib.exception import CommandException
from gslib.exception import NO_URLS_MATCHED_TARGET
from gslib.storage_url import StorageUrlFromString
from gslib.utils.encryption_helper import CryptoKeyWrapperFromKey
from gslib.utils.encryption_helper import FindMatchingCSEKInBotoConfig
from gslib.utils.metadata_util import ObjectIsGzipEncoded
from gslib.utils import text_util

_CAT_BUCKET_LISTING_FIELDS = [
    'bucket',
    'contentEncoding',
    'crc32c',
    'customerEncryption',
    'generation',
    'md5Hash',
    'name',
    'size',
]


class CatHelper(object):
  """Provides methods for the "cat" command and associated functionality."""

  def __init__(self, command_obj):
    """Initializes the helper object.

    Args:
      command_obj: gsutil command instance of calling command.
    """
    self.command_obj = command_obj

  def _WriteBytesBufferedFileToFile(self, src_fd, dst_fd):
    """Copies contents of the source to the destination via buffered IO.

    Buffered reads are necessary in the case where you're reading from a
    source that produces more data than can fit into memory all at once. This
    method does not close either file when finished.

    Args:
      src_fd: The already-open source file to read from.
      dst_fd: The already-open destination file to write to.
    """
    while True:
      buf = src_fd.read(io.DEFAULT_BUFFER_SIZE)
      if not buf:
        break
      text_util.write_to_fd(dst_fd, buf)

  def CatUrlStrings(self,
                    url_strings,
                    show_header=False,
                    start_byte=0,
                    end_byte=None,
                    cat_out_fd=None):
    """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
    printed_one = False
    # This should refer to whatever sys.stdin refers to when this method is
    # run, not when this method is defined, so we do the initialization here
    # rather than define sys.stdin as the cat_out_fd parameter's default value.
    if cat_out_fd is None:
      cat_out_fd = sys.stdout
    # We manipulate the stdout so that all other data other than the Object
    # contents go to stderr.
    old_stdout = sys.stdout
    sys.stdout = sys.stderr
    try:
      if url_strings and url_strings[0] in ('-', 'file://-'):
        self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
      else:
        for url_str in url_strings:
          did_some_work = False
          # TODO: Get only the needed fields here.
          for blr in self.command_obj.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
            decryption_keywrapper = None
            if (blr.root_object and blr.root_object.customerEncryption and
                blr.root_object.customerEncryption.keySha256):
              decryption_key = FindMatchingCSEKInBotoConfig(
                  blr.root_object.customerEncryption.keySha256, config)
              if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption '
                    'key matches object %s' %
                    (blr.root_object.customerEncryption.keySha256,
                     blr.url_string))
              decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key)

            did_some_work = True
            if show_header:
              if printed_one:
                print()
              print('==> %s <==' % blr)
              printed_one = True
            cat_object = blr.root_object
            # This if statement ensures nothing is outputted and no error
            # is thrown if the user enters an out of bounds range for the object.
            if 0 < getattr(cat_object, 'size', -1) <= start_byte:
              return 0
            storage_url = StorageUrlFromString(blr.url_string)
            if storage_url.IsCloudUrl():
              compressed_encoding = ObjectIsGzipEncoded(cat_object)
              self.command_obj.gsutil_api.GetObjectMedia(
                  cat_object.bucket,
                  cat_object.name,
                  cat_out_fd,
                  compressed_encoding=compressed_encoding,
                  start_byte=start_byte,
                  end_byte=end_byte,
                  object_size=cat_object.size,
                  generation=storage_url.generation,
                  decryption_tuple=decryption_keywrapper,
                  provider=storage_url.scheme)
              cat_out_fd.flush()

            else:
              with open(storage_url.object_name, 'rb') as f:
                self._WriteBytesBufferedFileToFile(f, cat_out_fd)
          if not did_some_work:
            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
    finally:
      sys.stdout = old_stdout

    return 0
Back to File Manager