1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.fs.ChecksumException;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.fs.Path;
29 import org.apache.hadoop.hbase.util.ChecksumType;
30 import org.apache.hadoop.util.DataChecksum;
31
32 /**
33 * Utility methods to compute and validate checksums.
34 */
35 @InterfaceAudience.Private
36 public class ChecksumUtil {
37 public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
38
39 /** This is used to reserve space in a byte buffer */
40 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
41
42 /**
43 * This is used by unit tests to make checksum failures throw an
44 * exception instead of returning null. Returning a null value from
45 * checksum validation will cause the higher layer to retry that
46 * read with hdfs-level checksums. Instead, we would like checksum
47 * failures to cause the entire unit test to fail.
48 */
49 private static boolean generateExceptions = false;
50
51 /**
52 * Generates a checksum for all the data in indata. The checksum is
53 * written to outdata.
54 * @param indata input data stream
55 * @param startOffset starting offset in the indata stream from where to
56 * compute checkums from
57 * @param endOffset ending offset in the indata stream upto
58 * which checksums needs to be computed
59 * @param outdata the output buffer where checksum values are written
60 * @param outOffset the starting offset in the outdata where the
61 * checksum values are written
62 * @param checksumType type of checksum
63 * @param bytesPerChecksum number of bytes per checksum value
64 */
65 static void generateChecksums(byte[] indata, int startOffset, int endOffset,
66 byte[] outdata, int outOffset, ChecksumType checksumType,
67 int bytesPerChecksum) throws IOException {
68
69 if (checksumType == ChecksumType.NULL) {
70 return; // No checksum for this block.
71 }
72
73 DataChecksum checksum = DataChecksum.newDataChecksum(
74 checksumType.getDataChecksumType(), bytesPerChecksum);
75
76 checksum.calculateChunkedSums(
77 ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
78 ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
79 }
80
81 /**
82 * Validates that the data in the specified HFileBlock matches the
83 * checksum. Generates the checksum for the data and
84 * then validate that it matches the value stored in the header.
85 * If there is a checksum mismatch, then return false. Otherwise
86 * return true.
87 * The header is extracted from the specified HFileBlock while the
88 * data-to-be-verified is extracted from 'data'.
89 */
90 static boolean validateBlockChecksum(Path path, long offset, HFileBlock block,
91 byte[] data, int hdrSize) throws IOException {
92
93 // If this is an older version of the block that does not have
94 // checksums, then return false indicating that checksum verification
95 // did not succeed. Actually, this methiod should never be called
96 // when the minorVersion is 0, thus this is a defensive check for a
97 // cannot-happen case. Since this is a cannot-happen case, it is
98 // better to return false to indicate a checksum validation failure.
99 if (!block.getHFileContext().isUseHBaseChecksum()) {
100 return false;
101 }
102
103 // Get a checksum object based on the type of checksum that is
104 // set in the HFileBlock header. A ChecksumType.NULL indicates that
105 // the caller is not interested in validating checksums, so we
106 // always return true.
107 ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType());
108 if (cktype == ChecksumType.NULL) {
109 return true; // No checksum validations needed for this block.
110 }
111
112 // read in the stored value of the checksum size from the header.
113 int bytesPerChecksum = block.getBytesPerChecksum();
114
115 DataChecksum dataChecksum = DataChecksum.newDataChecksum(
116 cktype.getDataChecksumType(), bytesPerChecksum);
117 assert dataChecksum != null;
118 int sizeWithHeader = block.getOnDiskDataSizeWithHeader();
119 if (LOG.isTraceEnabled()) {
120 LOG.info("dataLength=" + data.length
121 + ", sizeWithHeader=" + sizeWithHeader
122 + ", checksumType=" + cktype.getName()
123 + ", file=" + path.toString()
124 + ", offset=" + offset
125 + ", headerSize=" + hdrSize
126 + ", bytesPerChecksum=" + bytesPerChecksum);
127 }
128 try {
129 dataChecksum.verifyChunkedSums(ByteBuffer.wrap(data, 0, sizeWithHeader),
130 ByteBuffer.wrap(data, sizeWithHeader, data.length - sizeWithHeader),
131 path.toString(), 0);
132 } catch (ChecksumException e) {
133 return false;
134 }
135 return true; // checksum is valid
136 }
137
138 /**
139 * Returns the number of bytes needed to store the checksums for
140 * a specified data size
141 * @param datasize number of bytes of data
142 * @param bytesPerChecksum number of bytes in a checksum chunk
143 * @return The number of bytes needed to store the checksum values
144 */
145 static long numBytes(long datasize, int bytesPerChecksum) {
146 return numChunks(datasize, bytesPerChecksum) *
147 HFileBlock.CHECKSUM_SIZE;
148 }
149
150 /**
151 * Returns the number of checksum chunks needed to store the checksums for
152 * a specified data size
153 * @param datasize number of bytes of data
154 * @param bytesPerChecksum number of bytes in a checksum chunk
155 * @return The number of checksum chunks
156 */
157 static long numChunks(long datasize, int bytesPerChecksum) {
158 long numChunks = datasize/bytesPerChecksum;
159 if (datasize % bytesPerChecksum != 0) {
160 numChunks++;
161 }
162 return numChunks;
163 }
164
165 /**
166 * Write dummy checksums to the end of the specified bytes array
167 * to reserve space for writing checksums later
168 * @param baos OutputStream to write dummy checkum values
169 * @param numBytes Number of bytes of data for which dummy checksums
170 * need to be generated
171 * @param bytesPerChecksum Number of bytes per checksum value
172 */
173 static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
174 int numBytes, int bytesPerChecksum) throws IOException {
175 long numChunks = numChunks(numBytes, bytesPerChecksum);
176 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
177 while (bytesLeft > 0) {
178 long count = Math.min(bytesLeft, DUMMY_VALUE.length);
179 baos.write(DUMMY_VALUE, 0, (int)count);
180 bytesLeft -= count;
181 }
182 }
183
184 /**
185 * Mechanism to throw an exception in case of hbase checksum
186 * failure. This is used by unit tests only.
187 * @param value Setting this to true will cause hbase checksum
188 * verification failures to generate exceptions.
189 */
190 public static void generateExceptionForChecksumFailureForTest(boolean value) {
191 generateExceptions = value;
192 }
193 }
194