blob: 06221e68b3505432c079d35d74a6accab376c94d [file] [log] [blame]
Tom Rini10e47792018-05-06 17:58:06 -04001// SPDX-License-Identifier: GPL-2.0+
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002/*
3 *
4 * ZFS filesystem ported to u-boot by
5 * Jorgen Lundman <lundman at lundman.net>
6 *
7 * GRUB -- GRand Unified Bootloader
8 * Copyright (C) 1999,2000,2001,2002,2003,2004
9 * Free Software Foundation, Inc.
10 * Copyright 2004 Sun Microsystems, Inc.
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +000011 */
12
Simon Glass0f2af882020-05-10 11:40:05 -060013#include <log.h>
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +000014#include <malloc.h>
15#include <linux/stat.h>
16#include <linux/time.h>
17#include <linux/ctype.h>
18#include <asm/byteorder.h>
19#include "zfs_common.h"
Alejandro Mery8b773142012-10-31 08:21:33 +000020#include "div64.h"
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +000021
Simon Glasse3394752016-02-29 15:25:34 -070022struct blk_desc *zfs_dev_desc;
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +000023
24/*
25 * The zfs plug-in routines for GRUB are:
26 *
27 * zfs_mount() - locates a valid uberblock of the root pool and reads
28 * in its MOS at the memory address MOS.
29 *
30 * zfs_open() - locates a plain file object by following the MOS
31 * and places its dnode at the memory address DNODE.
32 *
33 * zfs_read() - read in the data blocks pointed by the DNODE.
34 *
35 */
36
37#include <zfs/zfs.h>
38#include <zfs/zio.h>
39#include <zfs/dnode.h>
40#include <zfs/uberblock_impl.h>
41#include <zfs/vdev_impl.h>
42#include <zfs/zio_checksum.h>
43#include <zfs/zap_impl.h>
44#include <zfs/zap_leaf.h>
45#include <zfs/zfs_znode.h>
46#include <zfs/dmu.h>
47#include <zfs/dmu_objset.h>
48#include <zfs/sa_impl.h>
49#include <zfs/dsl_dir.h>
50#include <zfs/dsl_dataset.h>
51
52
53#define ZPOOL_PROP_BOOTFS "bootfs"
54
55
56/*
57 * For nvlist manipulation. (from nvpair.h)
58 */
59#define NV_ENCODE_NATIVE 0
60#define NV_ENCODE_XDR 1
61#define NV_BIG_ENDIAN 0
62#define NV_LITTLE_ENDIAN 1
63#define DATA_TYPE_UINT64 8
64#define DATA_TYPE_STRING 9
65#define DATA_TYPE_NVLIST 19
66#define DATA_TYPE_NVLIST_ARRAY 20
67
68
69/*
70 * Macros to get fields in a bp or DVA.
71 */
72#define P2PHASE(x, align) ((x) & ((align) - 1))
73#define DVA_OFFSET_TO_PHYS_SECTOR(offset) \
74 ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT)
75
76/*
77 * return x rounded down to an align boundary
78 * eg, P2ALIGN(1200, 1024) == 1024 (1*align)
79 * eg, P2ALIGN(1024, 1024) == 1024 (1*align)
80 * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
81 * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
82 */
83#define P2ALIGN(x, align) ((x) & -(align))
84
85/*
86 * FAT ZAP data structures
87 */
88#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
89#define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
90#define CHAIN_END 0xffff /* end of the chunk chain */
91
92/*
93 * The amount of space within the chunk available for the array is:
94 * chunk size - space for type (1) - space for next pointer (2)
95 */
96#define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
97
98#define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5)
99#define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs))
100#define LEAF_HASH(bs, h) \
101 ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \
102 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len)))
103
104/*
105 * The amount of space available for chunks is:
106 * block size shift - hash entry size (2) * number of hash
107 * entries - header space (2*chunksize)
108 */
109#define ZAP_LEAF_NUMCHUNKS(bs) \
110 (((1<<bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(bs)) / \
111 ZAP_LEAF_CHUNKSIZE - 2)
112
113/*
114 * The chunks start immediately after the hash table. The end of the
115 * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
116 * chunk_t.
117 */
118#define ZAP_LEAF_CHUNK(l, bs, idx) \
119 ((zap_leaf_chunk_t *)(l->l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx]
120#define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry)
121
122
123/*
124 * Decompression Entry - lzjb
125 */
126#ifndef NBBY
127#define NBBY 8
128#endif
129
130
131
132typedef int zfs_decomp_func_t(void *s_start, void *d_start,
133 uint32_t s_len, uint32_t d_len);
134typedef struct decomp_entry {
135 char *name;
136 zfs_decomp_func_t *decomp_func;
137} decomp_entry_t;
138
139typedef struct dnode_end {
140 dnode_phys_t dn;
141 zfs_endian_t endian;
142} dnode_end_t;
143
144struct zfs_data {
145 /* cache for a file block of the currently zfs_open()-ed file */
146 char *file_buf;
147 uint64_t file_start;
148 uint64_t file_end;
149
150 /* XXX: ashift is per vdev, not per pool. We currently only ever touch
151 * a single vdev, but when/if raid-z or stripes are supported, this
152 * may need revision.
153 */
154 uint64_t vdev_ashift;
155 uint64_t label_txg;
156 uint64_t pool_guid;
157
158 /* cache for a dnode block */
159 dnode_phys_t *dnode_buf;
160 dnode_phys_t *dnode_mdn;
161 uint64_t dnode_start;
162 uint64_t dnode_end;
163 zfs_endian_t dnode_endian;
164
165 uberblock_t current_uberblock;
166
167 dnode_end_t mos;
168 dnode_end_t mdn;
169 dnode_end_t dnode;
170
171 uint64_t vdev_phys_sector;
172
173 int (*userhook)(const char *, const struct zfs_dirhook_info *);
174 struct zfs_dirhook_info *dirinfo;
175
176};
177
178
179
180
181static int
182zlib_decompress(void *s, void *d,
183 uint32_t slen, uint32_t dlen)
184{
185 if (zlib_decompress(s, d, slen, dlen) < 0)
186 return ZFS_ERR_BAD_FS;
187 return ZFS_ERR_NONE;
188}
189
190static decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = {
191 {"inherit", NULL}, /* ZIO_COMPRESS_INHERIT */
192 {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */
193 {"off", NULL}, /* ZIO_COMPRESS_OFF */
194 {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */
195 {"empty", NULL}, /* ZIO_COMPRESS_EMPTY */
196 {"gzip-1", zlib_decompress}, /* ZIO_COMPRESS_GZIP1 */
197 {"gzip-2", zlib_decompress}, /* ZIO_COMPRESS_GZIP2 */
198 {"gzip-3", zlib_decompress}, /* ZIO_COMPRESS_GZIP3 */
199 {"gzip-4", zlib_decompress}, /* ZIO_COMPRESS_GZIP4 */
200 {"gzip-5", zlib_decompress}, /* ZIO_COMPRESS_GZIP5 */
201 {"gzip-6", zlib_decompress}, /* ZIO_COMPRESS_GZIP6 */
202 {"gzip-7", zlib_decompress}, /* ZIO_COMPRESS_GZIP7 */
203 {"gzip-8", zlib_decompress}, /* ZIO_COMPRESS_GZIP8 */
204 {"gzip-9", zlib_decompress}, /* ZIO_COMPRESS_GZIP9 */
205};
206
207
208
209static int zio_read_data(blkptr_t *bp, zfs_endian_t endian,
210 void *buf, struct zfs_data *data);
211
212static int
213zio_read(blkptr_t *bp, zfs_endian_t endian, void **buf,
214 size_t *size, struct zfs_data *data);
215
216/*
217 * Our own version of log2(). Same thing as highbit()-1.
218 */
219static int
220zfs_log2(uint64_t num)
221{
222 int i = 0;
223
224 while (num > 1) {
225 i++;
226 num = num >> 1;
227 }
228
229 return i;
230}
231
232
233/* Checksum Functions */
234static void
235zio_checksum_off(const void *buf __attribute__ ((unused)),
236 uint64_t size __attribute__ ((unused)),
237 zfs_endian_t endian __attribute__ ((unused)),
238 zio_cksum_t *zcp)
239{
240 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
241}
242
243/* Checksum Table and Values */
244static zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
245 {NULL, 0, 0, "inherit"},
246 {NULL, 0, 0, "on"},
247 {zio_checksum_off, 0, 0, "off"},
248 {zio_checksum_SHA256, 1, 1, "label"},
249 {zio_checksum_SHA256, 1, 1, "gang_header"},
250 {NULL, 0, 0, "zilog"},
251 {fletcher_2_endian, 0, 0, "fletcher2"},
252 {fletcher_4_endian, 1, 0, "fletcher4"},
253 {zio_checksum_SHA256, 1, 0, "SHA256"},
254 {NULL, 0, 0, "zilog2"},
255};
256
257/*
258 * zio_checksum_verify: Provides support for checksum verification.
259 *
260 * Fletcher2, Fletcher4, and SHA256 are supported.
261 *
262 */
263static int
264zio_checksum_verify(zio_cksum_t zc, uint32_t checksum,
265 zfs_endian_t endian, char *buf, int size)
266{
267 zio_eck_t *zec = (zio_eck_t *) (buf + size) - 1;
268 zio_checksum_info_t *ci = &zio_checksum_table[checksum];
269 zio_cksum_t actual_cksum, expected_cksum;
270
271 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func == NULL) {
272 printf("zfs unknown checksum function %d\n", checksum);
273 return ZFS_ERR_NOT_IMPLEMENTED_YET;
274 }
275
276 if (ci->ci_eck) {
277 expected_cksum = zec->zec_cksum;
278 zec->zec_cksum = zc;
279 ci->ci_func(buf, size, endian, &actual_cksum);
280 zec->zec_cksum = expected_cksum;
281 zc = expected_cksum;
282 } else {
283 ci->ci_func(buf, size, endian, &actual_cksum);
284 }
285
286 if ((actual_cksum.zc_word[0] != zc.zc_word[0])
287 || (actual_cksum.zc_word[1] != zc.zc_word[1])
288 || (actual_cksum.zc_word[2] != zc.zc_word[2])
289 || (actual_cksum.zc_word[3] != zc.zc_word[3])) {
290 return ZFS_ERR_BAD_FS;
291 }
292
293 return ZFS_ERR_NONE;
294}
295
296/*
297 * vdev_uberblock_compare takes two uberblock structures and returns an integer
298 * indicating the more recent of the two.
299 * Return Value = 1 if ub2 is more recent
300 * Return Value = -1 if ub1 is more recent
301 * The most recent uberblock is determined using its transaction number and
302 * timestamp. The uberblock with the highest transaction number is
303 * considered "newer". If the transaction numbers of the two blocks match, the
304 * timestamps are compared to determine the "newer" of the two.
305 */
306static int
307vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
308{
309 zfs_endian_t ub1_endian, ub2_endian;
310 if (zfs_to_cpu64(ub1->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC)
311 ub1_endian = LITTLE_ENDIAN;
312 else
313 ub1_endian = BIG_ENDIAN;
314 if (zfs_to_cpu64(ub2->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC)
315 ub2_endian = LITTLE_ENDIAN;
316 else
317 ub2_endian = BIG_ENDIAN;
318
319 if (zfs_to_cpu64(ub1->ub_txg, ub1_endian)
320 < zfs_to_cpu64(ub2->ub_txg, ub2_endian))
321 return -1;
322 if (zfs_to_cpu64(ub1->ub_txg, ub1_endian)
323 > zfs_to_cpu64(ub2->ub_txg, ub2_endian))
324 return 1;
325
326 if (zfs_to_cpu64(ub1->ub_timestamp, ub1_endian)
327 < zfs_to_cpu64(ub2->ub_timestamp, ub2_endian))
328 return -1;
329 if (zfs_to_cpu64(ub1->ub_timestamp, ub1_endian)
330 > zfs_to_cpu64(ub2->ub_timestamp, ub2_endian))
331 return 1;
332
333 return 0;
334}
335
WHR54ef5252024-05-01 00:28:32 +0800336static inline int
337is_supported_spa_version(uint64_t version) {
338 return version == FEATURES_SUPPORTED_SPA_VERSION ||
339 (version > 0 && version <= SPA_VERSION);
340}
341
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000342/*
343 * Three pieces of information are needed to verify an uberblock: the magic
344 * number, the version number, and the checksum.
345 *
346 * Currently Implemented: version number, magic number, label txg
347 * Need to Implement: checksum
348 *
349 */
350static int
351uberblock_verify(uberblock_t *uber, int offset, struct zfs_data *data)
352{
353 int err;
354 zfs_endian_t endian = UNKNOWN_ENDIAN;
355 zio_cksum_t zc;
356
357 if (uber->ub_txg < data->label_txg) {
358 debug("ignoring partially written label: uber_txg < label_txg %llu %llu\n",
359 uber->ub_txg, data->label_txg);
360 return ZFS_ERR_BAD_FS;
361 }
362
WHR54ef5252024-05-01 00:28:32 +0800363 if (zfs_to_cpu64(uber->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC &&
364 is_supported_spa_version(zfs_to_cpu64(uber->ub_version, LITTLE_ENDIAN)))
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000365 endian = LITTLE_ENDIAN;
366
WHR54ef5252024-05-01 00:28:32 +0800367 if (zfs_to_cpu64(uber->ub_magic, BIG_ENDIAN) == UBERBLOCK_MAGIC &&
368 is_supported_spa_version(zfs_to_cpu64(uber->ub_version, BIG_ENDIAN)))
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000369 endian = BIG_ENDIAN;
370
371 if (endian == UNKNOWN_ENDIAN) {
372 printf("invalid uberblock magic\n");
373 return ZFS_ERR_BAD_FS;
374 }
375
376 memset(&zc, 0, sizeof(zc));
377 zc.zc_word[0] = cpu_to_zfs64(offset, endian);
378 err = zio_checksum_verify(zc, ZIO_CHECKSUM_LABEL, endian,
379 (char *) uber, UBERBLOCK_SIZE(data->vdev_ashift));
380
381 if (!err) {
382 /* Check that the data pointed by the rootbp is usable. */
383 void *osp = NULL;
384 size_t ospsize;
385 err = zio_read(&uber->ub_rootbp, endian, &osp, &ospsize, data);
386 free(osp);
387
388 if (!err && ospsize < OBJSET_PHYS_SIZE_V14) {
389 printf("uberblock rootbp points to invalid data\n");
390 return ZFS_ERR_BAD_FS;
391 }
392 }
393
394 return err;
395}
396
397/*
398 * Find the best uberblock.
399 * Return:
400 * Success - Pointer to the best uberblock.
401 * Failure - NULL
402 */
403static uberblock_t *find_bestub(char *ub_array, struct zfs_data *data)
404{
405 const uint64_t sector = data->vdev_phys_sector;
406 uberblock_t *ubbest = NULL;
407 uberblock_t *ubnext;
408 unsigned int i, offset, pickedub = 0;
409 int err = ZFS_ERR_NONE;
410
411 const unsigned int UBCOUNT = UBERBLOCK_COUNT(data->vdev_ashift);
412 const uint64_t UBBYTES = UBERBLOCK_SIZE(data->vdev_ashift);
413
414 for (i = 0; i < UBCOUNT; i++) {
415 ubnext = (uberblock_t *) (i * UBBYTES + ub_array);
416 offset = (sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE + (i * UBBYTES);
417
418 err = uberblock_verify(ubnext, offset, data);
419 if (err)
420 continue;
421
422 if (ubbest == NULL || vdev_uberblock_compare(ubnext, ubbest) > 0) {
423 ubbest = ubnext;
424 pickedub = i;
425 }
426 }
427
428 if (ubbest)
429 debug("zfs Found best uberblock at idx %d, txg %llu\n",
430 pickedub, (unsigned long long) ubbest->ub_txg);
431
432 return ubbest;
433}
434
435static inline size_t
436get_psize(blkptr_t *bp, zfs_endian_t endian)
437{
438 return (((zfs_to_cpu64((bp)->blk_prop, endian) >> 16) & 0xffff) + 1)
439 << SPA_MINBLOCKSHIFT;
440}
441
442static uint64_t
443dva_get_offset(dva_t *dva, zfs_endian_t endian)
444{
445 return zfs_to_cpu64((dva)->dva_word[1],
446 endian) << SPA_MINBLOCKSHIFT;
447}
448
449/*
450 * Read a block of data based on the gang block address dva,
451 * and put its data in buf.
452 *
453 */
454static int
455zio_read_gang(blkptr_t *bp, zfs_endian_t endian, dva_t *dva, void *buf,
456 struct zfs_data *data)
457{
458 zio_gbh_phys_t *zio_gb;
459 uint64_t offset, sector;
460 unsigned i;
461 int err;
462 zio_cksum_t zc;
463
464 memset(&zc, 0, sizeof(zc));
465
466 zio_gb = malloc(SPA_GANGBLOCKSIZE);
467 if (!zio_gb)
468 return ZFS_ERR_OUT_OF_MEMORY;
469
470 offset = dva_get_offset(dva, endian);
471 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
472
473 /* read in the gang block header */
474 err = zfs_devread(sector, 0, SPA_GANGBLOCKSIZE, (char *) zio_gb);
475
476 if (err) {
477 free(zio_gb);
478 return err;
479 }
480
481 /* XXX */
482 /* self checksuming the gang block header */
483 ZIO_SET_CHECKSUM(&zc, DVA_GET_VDEV(dva),
484 dva_get_offset(dva, endian), bp->blk_birth, 0);
485 err = zio_checksum_verify(zc, ZIO_CHECKSUM_GANG_HEADER, endian,
486 (char *) zio_gb, SPA_GANGBLOCKSIZE);
487 if (err) {
488 free(zio_gb);
489 return err;
490 }
491
492 endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
493
494 for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
495 if (zio_gb->zg_blkptr[i].blk_birth == 0)
496 continue;
497
498 err = zio_read_data(&zio_gb->zg_blkptr[i], endian, buf, data);
499 if (err) {
500 free(zio_gb);
501 return err;
502 }
503 buf = (char *) buf + get_psize(&zio_gb->zg_blkptr[i], endian);
504 }
505 free(zio_gb);
506 return ZFS_ERR_NONE;
507}
508
509/*
510 * Read in a block of raw data to buf.
511 */
512static int
513zio_read_data(blkptr_t *bp, zfs_endian_t endian, void *buf,
514 struct zfs_data *data)
515{
516 int i, psize;
517 int err = ZFS_ERR_NONE;
518
519 psize = get_psize(bp, endian);
520
521 /* pick a good dva from the block pointer */
522 for (i = 0; i < SPA_DVAS_PER_BP; i++) {
523 uint64_t offset, sector;
524
525 if (bp->blk_dva[i].dva_word[0] == 0 && bp->blk_dva[i].dva_word[1] == 0)
526 continue;
527
528 if ((zfs_to_cpu64(bp->blk_dva[i].dva_word[1], endian)>>63) & 1) {
529 err = zio_read_gang(bp, endian, &bp->blk_dva[i], buf, data);
530 } else {
531 /* read in a data block */
532 offset = dva_get_offset(&bp->blk_dva[i], endian);
533 sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
534
535 err = zfs_devread(sector, 0, psize, buf);
536 }
537
538 if (!err) {
539 /*Check the underlying checksum before we rule this DVA as "good"*/
540 uint32_t checkalgo = (zfs_to_cpu64((bp)->blk_prop, endian) >> 40) & 0xff;
541
542 err = zio_checksum_verify(bp->blk_cksum, checkalgo, endian, buf, psize);
543 if (!err)
544 return ZFS_ERR_NONE;
545 }
546
547 /* If read failed or checksum bad, reset the error. Hopefully we've got some more DVA's to try.*/
548 }
549
550 if (!err) {
551 printf("couldn't find a valid DVA\n");
552 err = ZFS_ERR_BAD_FS;
553 }
554
555 return err;
556}
557
558/*
559 * Read in a block of data, verify its checksum, decompress if needed,
560 * and put the uncompressed data in buf.
561 */
562static int
563zio_read(blkptr_t *bp, zfs_endian_t endian, void **buf,
564 size_t *size, struct zfs_data *data)
565{
566 size_t lsize, psize;
567 unsigned int comp;
568 char *compbuf = NULL;
569 int err;
570
571 *buf = NULL;
572
573 comp = (zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0xff;
574 lsize = (BP_IS_HOLE(bp) ? 0 :
575 (((zfs_to_cpu64((bp)->blk_prop, endian) & 0xffff) + 1)
576 << SPA_MINBLOCKSHIFT));
577 psize = get_psize(bp, endian);
578
579 if (size)
580 *size = lsize;
581
582 if (comp >= ZIO_COMPRESS_FUNCTIONS) {
583 printf("compression algorithm %u not supported\n", (unsigned int) comp);
584 return ZFS_ERR_NOT_IMPLEMENTED_YET;
585 }
586
587 if (comp != ZIO_COMPRESS_OFF && decomp_table[comp].decomp_func == NULL) {
588 printf("compression algorithm %s not supported\n", decomp_table[comp].name);
589 return ZFS_ERR_NOT_IMPLEMENTED_YET;
590 }
591
592 if (comp != ZIO_COMPRESS_OFF) {
593 compbuf = malloc(psize);
594 if (!compbuf)
595 return ZFS_ERR_OUT_OF_MEMORY;
596 } else {
597 compbuf = *buf = malloc(lsize);
598 }
599
600 err = zio_read_data(bp, endian, compbuf, data);
601 if (err) {
602 free(compbuf);
603 *buf = NULL;
604 return err;
605 }
606
607 if (comp != ZIO_COMPRESS_OFF) {
608 *buf = malloc(lsize);
609 if (!*buf) {
610 free(compbuf);
611 return ZFS_ERR_OUT_OF_MEMORY;
612 }
613
614 err = decomp_table[comp].decomp_func(compbuf, *buf, psize, lsize);
615 free(compbuf);
616 if (err) {
617 free(*buf);
618 *buf = NULL;
619 return err;
620 }
621 }
622
623 return ZFS_ERR_NONE;
624}
625
626/*
627 * Get the block from a block id.
628 * push the block onto the stack.
629 *
630 */
631static int
632dmu_read(dnode_end_t *dn, uint64_t blkid, void **buf,
633 zfs_endian_t *endian_out, struct zfs_data *data)
634{
635 int idx, level;
636 blkptr_t *bp_array = dn->dn.dn_blkptr;
637 int epbs = dn->dn.dn_indblkshift - SPA_BLKPTRSHIFT;
638 blkptr_t *bp;
639 void *tmpbuf = 0;
640 zfs_endian_t endian;
641 int err = ZFS_ERR_NONE;
642
643 bp = malloc(sizeof(blkptr_t));
644 if (!bp)
645 return ZFS_ERR_OUT_OF_MEMORY;
646
647 endian = dn->endian;
648 for (level = dn->dn.dn_nlevels - 1; level >= 0; level--) {
649 idx = (blkid >> (epbs * level)) & ((1 << epbs) - 1);
650 *bp = bp_array[idx];
651 if (bp_array != dn->dn.dn_blkptr) {
652 free(bp_array);
653 bp_array = 0;
654 }
655
656 if (BP_IS_HOLE(bp)) {
657 size_t size = zfs_to_cpu16(dn->dn.dn_datablkszsec,
658 dn->endian)
659 << SPA_MINBLOCKSHIFT;
660 *buf = malloc(size);
mwleeds@mailtundra.com14a9faa2024-04-06 18:47:25 -0700661 if (!*buf) {
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000662 err = ZFS_ERR_OUT_OF_MEMORY;
663 break;
664 }
665 memset(*buf, 0, size);
666 endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
667 break;
668 }
669 if (level == 0) {
670 err = zio_read(bp, endian, buf, 0, data);
671 endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
672 break;
673 }
674 err = zio_read(bp, endian, &tmpbuf, 0, data);
675 endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
676 if (err)
677 break;
678 bp_array = tmpbuf;
679 }
680 if (bp_array != dn->dn.dn_blkptr)
681 free(bp_array);
682 if (endian_out)
683 *endian_out = endian;
684
685 free(bp);
686 return err;
687}
688
689/*
690 * mzap_lookup: Looks up property described by "name" and returns the value
691 * in "value".
692 */
693static int
694mzap_lookup(mzap_phys_t *zapobj, zfs_endian_t endian,
695 int objsize, char *name, uint64_t * value)
696{
697 int i, chunks;
698 mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
699
700 chunks = objsize / MZAP_ENT_LEN - 1;
701 for (i = 0; i < chunks; i++) {
702 if (strcmp(mzap_ent[i].mze_name, name) == 0) {
703 *value = zfs_to_cpu64(mzap_ent[i].mze_value, endian);
704 return ZFS_ERR_NONE;
705 }
706 }
707
708 printf("couldn't find '%s'\n", name);
709 return ZFS_ERR_FILE_NOT_FOUND;
710}
711
712static int
713mzap_iterate(mzap_phys_t *zapobj, zfs_endian_t endian, int objsize,
714 int (*hook)(const char *name,
715 uint64_t val,
716 struct zfs_data *data),
717 struct zfs_data *data)
718{
719 int i, chunks;
720 mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
721
722 chunks = objsize / MZAP_ENT_LEN - 1;
723 for (i = 0; i < chunks; i++) {
724 if (hook(mzap_ent[i].mze_name,
725 zfs_to_cpu64(mzap_ent[i].mze_value, endian),
726 data))
727 return 1;
728 }
729
730 return 0;
731}
732
733static uint64_t
734zap_hash(uint64_t salt, const char *name)
735{
736 static uint64_t table[256];
737 const uint8_t *cp;
738 uint8_t c;
739 uint64_t crc = salt;
740
741 if (table[128] == 0) {
Jorgen Lundman8d119d82014-11-07 10:08:35 +0900742 uint64_t *ct = NULL;
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000743 int i, j;
744 for (i = 0; i < 256; i++) {
745 for (ct = table + i, *ct = i, j = 8; j > 0; j--)
746 *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
747 }
748 }
749
750 for (cp = (const uint8_t *) name; (c = *cp) != '\0'; cp++)
751 crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
752
753 /*
754 * Only use 28 bits, since we need 4 bits in the cookie for the
755 * collision differentiator. We MUST use the high bits, since
756 * those are the onces that we first pay attention to when
757 * chosing the bucket.
758 */
759 crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
760
761 return crc;
762}
763
764/*
765 * Only to be used on 8-bit arrays.
766 * array_len is actual len in bytes (not encoded le_value_length).
767 * buf is null-terminated.
768 */
769/* XXX */
770static int
771zap_leaf_array_equal(zap_leaf_phys_t *l, zfs_endian_t endian,
772 int blksft, int chunk, int array_len, const char *buf)
773{
774 int bseen = 0;
775
776 while (bseen < array_len) {
777 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
Masahiro Yamadab62b39b2014-09-18 13:28:06 +0900778 int toread = min(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000779
780 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
781 return 0;
782
783 if (memcmp(la->la_array, buf + bseen, toread) != 0)
784 break;
785 chunk = zfs_to_cpu16(la->la_next, endian);
786 bseen += toread;
787 }
788 return (bseen == array_len);
789}
790
791/* XXX */
792static int
793zap_leaf_array_get(zap_leaf_phys_t *l, zfs_endian_t endian, int blksft,
794 int chunk, int array_len, char *buf)
795{
796 int bseen = 0;
797
798 while (bseen < array_len) {
799 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
Masahiro Yamadab62b39b2014-09-18 13:28:06 +0900800 int toread = min(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +0000801
802 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
803 /* Don't use errno because this error is to be ignored. */
804 return ZFS_ERR_BAD_FS;
805
806 memcpy(buf + bseen, la->la_array, toread);
807 chunk = zfs_to_cpu16(la->la_next, endian);
808 bseen += toread;
809 }
810 return ZFS_ERR_NONE;
811}
812
813
814/*
815 * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
816 * value for the property "name".
817 *
818 */
819/* XXX */
820static int
821zap_leaf_lookup(zap_leaf_phys_t *l, zfs_endian_t endian,
822 int blksft, uint64_t h,
823 const char *name, uint64_t *value)
824{
825 uint16_t chunk;
826 struct zap_leaf_entry *le;
827
828 /* Verify if this is a valid leaf block */
829 if (zfs_to_cpu64(l->l_hdr.lh_block_type, endian) != ZBT_LEAF) {
830 printf("invalid leaf type\n");
831 return ZFS_ERR_BAD_FS;
832 }
833 if (zfs_to_cpu32(l->l_hdr.lh_magic, endian) != ZAP_LEAF_MAGIC) {
834 printf("invalid leaf magic\n");
835 return ZFS_ERR_BAD_FS;
836 }
837
838 for (chunk = zfs_to_cpu16(l->l_hash[LEAF_HASH(blksft, h)], endian);
839 chunk != CHAIN_END; chunk = le->le_next) {
840
841 if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) {
842 printf("invalid chunk number\n");
843 return ZFS_ERR_BAD_FS;
844 }
845
846 le = ZAP_LEAF_ENTRY(l, blksft, chunk);
847
848 /* Verify the chunk entry */
849 if (le->le_type != ZAP_CHUNK_ENTRY) {
850 printf("invalid chunk entry\n");
851 return ZFS_ERR_BAD_FS;
852 }
853
854 if (zfs_to_cpu64(le->le_hash, endian) != h)
855 continue;
856
857 if (zap_leaf_array_equal(l, endian, blksft,
858 zfs_to_cpu16(le->le_name_chunk, endian),
859 zfs_to_cpu16(le->le_name_length, endian),
860 name)) {
861 struct zap_leaf_array *la;
862
863 if (le->le_int_size != 8 || le->le_value_length != 1) {
864 printf("invalid leaf chunk entry\n");
865 return ZFS_ERR_BAD_FS;
866 }
867 /* get the uint64_t property value */
868 la = &ZAP_LEAF_CHUNK(l, blksft, le->le_value_chunk).l_array;
869
870 *value = be64_to_cpu(la->la_array64);
871
872 return ZFS_ERR_NONE;
873 }
874 }
875
876 printf("couldn't find '%s'\n", name);
877 return ZFS_ERR_FILE_NOT_FOUND;
878}
879
880
881/* Verify if this is a fat zap header block */
882static int
883zap_verify(zap_phys_t *zap)
884{
885 if (zap->zap_magic != (uint64_t) ZAP_MAGIC) {
886 printf("bad ZAP magic\n");
887 return ZFS_ERR_BAD_FS;
888 }
889
890 if (zap->zap_flags != 0) {
891 printf("bad ZAP flags\n");
892 return ZFS_ERR_BAD_FS;
893 }
894
895 if (zap->zap_salt == 0) {
896 printf("bad ZAP salt\n");
897 return ZFS_ERR_BAD_FS;
898 }
899
900 return ZFS_ERR_NONE;
901}
902
903/*
904 * Fat ZAP lookup
905 *
906 */
907/* XXX */
908static int
909fzap_lookup(dnode_end_t *zap_dnode, zap_phys_t *zap,
910 char *name, uint64_t *value, struct zfs_data *data)
911{
912 void *l;
913 uint64_t hash, idx, blkid;
914 int blksft = zfs_log2(zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec,
915 zap_dnode->endian) << DNODE_SHIFT);
916 int err;
917 zfs_endian_t leafendian;
918
919 err = zap_verify(zap);
920 if (err)
921 return err;
922
923 hash = zap_hash(zap->zap_salt, name);
924
925 /* get block id from index */
926 if (zap->zap_ptrtbl.zt_numblks != 0) {
927 printf("external pointer tables not supported\n");
928 return ZFS_ERR_NOT_IMPLEMENTED_YET;
929 }
930 idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
931 blkid = ((uint64_t *) zap)[idx + (1 << (blksft - 3 - 1))];
932
933 /* Get the leaf block */
934 if ((1U << blksft) < sizeof(zap_leaf_phys_t)) {
935 printf("ZAP leaf is too small\n");
936 return ZFS_ERR_BAD_FS;
937 }
938 err = dmu_read(zap_dnode, blkid, &l, &leafendian, data);
939 if (err)
940 return err;
941
942 err = zap_leaf_lookup(l, leafendian, blksft, hash, name, value);
943 free(l);
944 return err;
945}
946
947/* XXX */
948static int
949fzap_iterate(dnode_end_t *zap_dnode, zap_phys_t *zap,
950 int (*hook)(const char *name,
951 uint64_t val,
952 struct zfs_data *data),
953 struct zfs_data *data)
954{
955 zap_leaf_phys_t *l;
956 void *l_in;
957 uint64_t idx, blkid;
958 uint16_t chunk;
959 int blksft = zfs_log2(zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec,
960 zap_dnode->endian) << DNODE_SHIFT);
961 int err;
962 zfs_endian_t endian;
963
964 if (zap_verify(zap))
965 return 0;
966
967 /* get block id from index */
968 if (zap->zap_ptrtbl.zt_numblks != 0) {
969 printf("external pointer tables not supported\n");
970 return 0;
971 }
972 /* Get the leaf block */
973 if ((1U << blksft) < sizeof(zap_leaf_phys_t)) {
974 printf("ZAP leaf is too small\n");
975 return 0;
976 }
977 for (idx = 0; idx < zap->zap_ptrtbl.zt_numblks; idx++) {
978 blkid = ((uint64_t *) zap)[idx + (1 << (blksft - 3 - 1))];
979
980 err = dmu_read(zap_dnode, blkid, &l_in, &endian, data);
981 l = l_in;
982 if (err)
983 continue;
984
985 /* Verify if this is a valid leaf block */
986 if (zfs_to_cpu64(l->l_hdr.lh_block_type, endian) != ZBT_LEAF) {
987 free(l);
988 continue;
989 }
990 if (zfs_to_cpu32(l->l_hdr.lh_magic, endian) != ZAP_LEAF_MAGIC) {
991 free(l);
992 continue;
993 }
994
995 for (chunk = 0; chunk < ZAP_LEAF_NUMCHUNKS(blksft); chunk++) {
996 char *buf;
997 struct zap_leaf_array *la;
998 struct zap_leaf_entry *le;
999 uint64_t val;
1000 le = ZAP_LEAF_ENTRY(l, blksft, chunk);
1001
1002 /* Verify the chunk entry */
1003 if (le->le_type != ZAP_CHUNK_ENTRY)
1004 continue;
1005
1006 buf = malloc(zfs_to_cpu16(le->le_name_length, endian)
1007 + 1);
1008 if (zap_leaf_array_get(l, endian, blksft, le->le_name_chunk,
1009 le->le_name_length, buf)) {
1010 free(buf);
1011 continue;
1012 }
1013 buf[le->le_name_length] = 0;
1014
1015 if (le->le_int_size != 8
1016 || zfs_to_cpu16(le->le_value_length, endian) != 1)
1017 continue;
1018
1019 /* get the uint64_t property value */
1020 la = &ZAP_LEAF_CHUNK(l, blksft, le->le_value_chunk).l_array;
1021 val = be64_to_cpu(la->la_array64);
1022 if (hook(buf, val, data))
1023 return 1;
1024 free(buf);
1025 }
1026 }
1027 return 0;
1028}
1029
1030
1031/*
1032 * Read in the data of a zap object and find the value for a matching
1033 * property name.
1034 *
1035 */
1036static int
1037zap_lookup(dnode_end_t *zap_dnode, char *name, uint64_t *val,
1038 struct zfs_data *data)
1039{
1040 uint64_t block_type;
1041 int size;
1042 void *zapbuf;
1043 int err;
1044 zfs_endian_t endian;
1045
1046 /* Read in the first block of the zap object data. */
1047 size = zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec,
1048 zap_dnode->endian) << SPA_MINBLOCKSHIFT;
1049 err = dmu_read(zap_dnode, 0, &zapbuf, &endian, data);
1050 if (err)
1051 return err;
1052 block_type = zfs_to_cpu64(*((uint64_t *) zapbuf), endian);
1053
1054 if (block_type == ZBT_MICRO) {
1055 err = (mzap_lookup(zapbuf, endian, size, name, val));
1056 free(zapbuf);
1057 return err;
1058 } else if (block_type == ZBT_HEADER) {
1059 /* this is a fat zap */
1060 err = (fzap_lookup(zap_dnode, zapbuf, name, val, data));
1061 free(zapbuf);
1062 return err;
1063 }
1064
1065 printf("unknown ZAP type\n");
Jorgen Lundman8d119d82014-11-07 10:08:35 +09001066 free(zapbuf);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001067 return ZFS_ERR_BAD_FS;
1068}
1069
1070static int
1071zap_iterate(dnode_end_t *zap_dnode,
1072 int (*hook)(const char *name, uint64_t val,
1073 struct zfs_data *data),
1074 struct zfs_data *data)
1075{
1076 uint64_t block_type;
1077 int size;
1078 void *zapbuf;
1079 int err;
1080 int ret;
1081 zfs_endian_t endian;
1082
1083 /* Read in the first block of the zap object data. */
1084 size = zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec, zap_dnode->endian) << SPA_MINBLOCKSHIFT;
1085 err = dmu_read(zap_dnode, 0, &zapbuf, &endian, data);
1086 if (err)
1087 return 0;
1088 block_type = zfs_to_cpu64(*((uint64_t *) zapbuf), endian);
1089
1090 if (block_type == ZBT_MICRO) {
1091 ret = mzap_iterate(zapbuf, endian, size, hook, data);
1092 free(zapbuf);
1093 return ret;
1094 } else if (block_type == ZBT_HEADER) {
1095 /* this is a fat zap */
1096 ret = fzap_iterate(zap_dnode, zapbuf, hook, data);
1097 free(zapbuf);
1098 return ret;
1099 }
1100 printf("unknown ZAP type\n");
Jorgen Lundman8d119d82014-11-07 10:08:35 +09001101 free(zapbuf);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001102 return 0;
1103}
1104
1105
1106/*
1107 * Get the dnode of an object number from the metadnode of an object set.
1108 *
1109 * Input
1110 * mdn - metadnode to get the object dnode
1111 * objnum - object number for the object dnode
1112 * buf - data buffer that holds the returning dnode
1113 */
1114static int
1115dnode_get(dnode_end_t *mdn, uint64_t objnum, uint8_t type,
1116 dnode_end_t *buf, struct zfs_data *data)
1117{
1118 uint64_t blkid, blksz; /* the block id this object dnode is in */
1119 int epbs; /* shift of number of dnodes in a block */
1120 int idx; /* index within a block */
1121 void *dnbuf;
1122 int err;
1123 zfs_endian_t endian;
1124
1125 blksz = zfs_to_cpu16(mdn->dn.dn_datablkszsec,
1126 mdn->endian) << SPA_MINBLOCKSHIFT;
1127
1128 epbs = zfs_log2(blksz) - DNODE_SHIFT;
1129 blkid = objnum >> epbs;
1130 idx = objnum & ((1 << epbs) - 1);
1131
1132 if (data->dnode_buf != NULL && memcmp(data->dnode_mdn, mdn,
1133 sizeof(*mdn)) == 0
1134 && objnum >= data->dnode_start && objnum < data->dnode_end) {
1135 memmove(&(buf->dn), &(data->dnode_buf)[idx], DNODE_SIZE);
1136 buf->endian = data->dnode_endian;
1137 if (type && buf->dn.dn_type != type) {
1138 printf("incorrect dnode type: %02X != %02x\n", buf->dn.dn_type, type);
1139 return ZFS_ERR_BAD_FS;
1140 }
1141 return ZFS_ERR_NONE;
1142 }
1143
1144 err = dmu_read(mdn, blkid, &dnbuf, &endian, data);
1145 if (err)
1146 return err;
1147
1148 free(data->dnode_buf);
1149 free(data->dnode_mdn);
1150 data->dnode_mdn = malloc(sizeof(*mdn));
1151 if (!data->dnode_mdn) {
1152 data->dnode_buf = 0;
1153 } else {
1154 memcpy(data->dnode_mdn, mdn, sizeof(*mdn));
1155 data->dnode_buf = dnbuf;
1156 data->dnode_start = blkid << epbs;
1157 data->dnode_end = (blkid + 1) << epbs;
1158 data->dnode_endian = endian;
1159 }
1160
1161 memmove(&(buf->dn), (dnode_phys_t *) dnbuf + idx, DNODE_SIZE);
1162 buf->endian = endian;
1163 if (type && buf->dn.dn_type != type) {
1164 printf("incorrect dnode type\n");
1165 return ZFS_ERR_BAD_FS;
1166 }
1167
1168 return ZFS_ERR_NONE;
1169}
1170
1171/*
1172 * Get the file dnode for a given file name where mdn is the meta dnode
1173 * for this ZFS object set. When found, place the file dnode in dn.
1174 * The 'path' argument will be mangled.
1175 *
1176 */
1177static int
1178dnode_get_path(dnode_end_t *mdn, const char *path_in, dnode_end_t *dn,
1179 struct zfs_data *data)
1180{
1181 uint64_t objnum, version;
1182 char *cname, ch;
1183 int err = ZFS_ERR_NONE;
1184 char *path, *path_buf;
1185 struct dnode_chain {
1186 struct dnode_chain *next;
1187 dnode_end_t dn;
1188 };
1189 struct dnode_chain *dnode_path = 0, *dn_new, *root;
1190
1191 dn_new = malloc(sizeof(*dn_new));
1192 if (!dn_new)
1193 return ZFS_ERR_OUT_OF_MEMORY;
1194 dn_new->next = 0;
1195 dnode_path = root = dn_new;
1196
1197 err = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
1198 &(dnode_path->dn), data);
1199 if (err) {
1200 free(dn_new);
1201 return err;
1202 }
1203
1204 err = zap_lookup(&(dnode_path->dn), ZPL_VERSION_STR, &version, data);
1205 if (err) {
1206 free(dn_new);
1207 return err;
1208 }
1209 if (version > ZPL_VERSION) {
1210 free(dn_new);
1211 printf("too new ZPL version\n");
1212 return ZFS_ERR_NOT_IMPLEMENTED_YET;
1213 }
1214
1215 err = zap_lookup(&(dnode_path->dn), ZFS_ROOT_OBJ, &objnum, data);
1216 if (err) {
1217 free(dn_new);
1218 return err;
1219 }
1220
1221 err = dnode_get(mdn, objnum, 0, &(dnode_path->dn), data);
1222 if (err) {
1223 free(dn_new);
1224 return err;
1225 }
1226
1227 path = path_buf = strdup(path_in);
1228 if (!path_buf) {
1229 free(dn_new);
1230 return ZFS_ERR_OUT_OF_MEMORY;
1231 }
1232
1233 while (1) {
1234 /* skip leading slashes */
1235 while (*path == '/')
1236 path++;
1237 if (!*path)
1238 break;
1239 /* get the next component name */
1240 cname = path;
1241 while (*path && *path != '/')
1242 path++;
1243 /* Skip dot. */
1244 if (cname + 1 == path && cname[0] == '.')
1245 continue;
1246 /* Handle double dot. */
1247 if (cname + 2 == path && cname[0] == '.' && cname[1] == '.') {
1248 if (dn_new->next) {
1249 dn_new = dnode_path;
1250 dnode_path = dn_new->next;
1251 free(dn_new);
1252 } else {
1253 printf("can't resolve ..\n");
1254 err = ZFS_ERR_FILE_NOT_FOUND;
1255 break;
1256 }
1257 continue;
1258 }
1259
1260 ch = *path;
1261 *path = 0; /* ensure null termination */
1262
1263 if (dnode_path->dn.dn.dn_type != DMU_OT_DIRECTORY_CONTENTS) {
1264 free(path_buf);
1265 printf("not a directory\n");
1266 return ZFS_ERR_BAD_FILE_TYPE;
1267 }
1268 err = zap_lookup(&(dnode_path->dn), cname, &objnum, data);
1269 if (err)
1270 break;
1271
1272 dn_new = malloc(sizeof(*dn_new));
1273 if (!dn_new) {
1274 err = ZFS_ERR_OUT_OF_MEMORY;
1275 break;
1276 }
1277 dn_new->next = dnode_path;
1278 dnode_path = dn_new;
1279
1280 objnum = ZFS_DIRENT_OBJ(objnum);
1281 err = dnode_get(mdn, objnum, 0, &(dnode_path->dn), data);
1282 if (err)
1283 break;
1284
1285 *path = ch;
1286 }
1287
1288 if (!err)
1289 memcpy(dn, &(dnode_path->dn), sizeof(*dn));
1290
1291 while (dnode_path) {
1292 dn_new = dnode_path->next;
1293 free(dnode_path);
1294 dnode_path = dn_new;
1295 }
1296 free(path_buf);
1297 return err;
1298}
1299
1300
1301/*
1302 * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
1303 * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
1304 * of pool/rootfs.
1305 *
1306 * If no fsname and no obj are given, return the DSL_DIR metadnode.
1307 * If fsname is given, return its metadnode and its matching object number.
1308 * If only obj is given, return the metadnode for this object number.
1309 *
1310 */
1311static int
1312get_filesystem_dnode(dnode_end_t *mosmdn, char *fsname,
1313 dnode_end_t *mdn, struct zfs_data *data)
1314{
1315 uint64_t objnum;
1316 int err;
1317
1318 err = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
1319 DMU_OT_OBJECT_DIRECTORY, mdn, data);
1320 if (err)
1321 return err;
1322
1323 err = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, data);
1324 if (err)
1325 return err;
1326
1327 err = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, data);
1328 if (err)
1329 return err;
1330
1331 while (*fsname) {
1332 uint64_t childobj;
1333 char *cname, ch;
1334
1335 while (*fsname == '/')
1336 fsname++;
1337
1338 if (!*fsname || *fsname == '@')
1339 break;
1340
1341 cname = fsname;
1342 while (*fsname && !isspace(*fsname) && *fsname != '/')
1343 fsname++;
1344 ch = *fsname;
1345 *fsname = 0;
1346
1347 childobj = zfs_to_cpu64((((dsl_dir_phys_t *) DN_BONUS(&mdn->dn)))->dd_child_dir_zapobj, mdn->endian);
1348 err = dnode_get(mosmdn, childobj,
1349 DMU_OT_DSL_DIR_CHILD_MAP, mdn, data);
1350 if (err)
1351 return err;
1352
1353 err = zap_lookup(mdn, cname, &objnum, data);
1354 if (err)
1355 return err;
1356
1357 err = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, data);
1358 if (err)
1359 return err;
1360
1361 *fsname = ch;
1362 }
1363 return ZFS_ERR_NONE;
1364}
1365
1366static int
1367make_mdn(dnode_end_t *mdn, struct zfs_data *data)
1368{
1369 void *osp;
1370 blkptr_t *bp;
1371 size_t ospsize;
1372 int err;
1373
1374 bp = &(((dsl_dataset_phys_t *) DN_BONUS(&mdn->dn))->ds_bp);
1375 err = zio_read(bp, mdn->endian, &osp, &ospsize, data);
1376 if (err)
1377 return err;
1378 if (ospsize < OBJSET_PHYS_SIZE_V14) {
1379 free(osp);
1380 printf("too small osp\n");
1381 return ZFS_ERR_BAD_FS;
1382 }
1383
1384 mdn->endian = (zfs_to_cpu64(bp->blk_prop, mdn->endian)>>63) & 1;
1385 memmove((char *) &(mdn->dn),
1386 (char *) &((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
1387 free(osp);
1388 return ZFS_ERR_NONE;
1389}
1390
1391static int
1392dnode_get_fullpath(const char *fullpath, dnode_end_t *mdn,
1393 uint64_t *mdnobj, dnode_end_t *dn, int *isfs,
1394 struct zfs_data *data)
1395{
1396 char *fsname, *snapname;
1397 const char *ptr_at, *filename;
1398 uint64_t headobj;
1399 int err;
1400
1401 ptr_at = strchr(fullpath, '@');
1402 if (!ptr_at) {
1403 *isfs = 1;
1404 filename = 0;
1405 snapname = 0;
1406 fsname = strdup(fullpath);
1407 } else {
1408 const char *ptr_slash = strchr(ptr_at, '/');
1409
1410 *isfs = 0;
1411 fsname = malloc(ptr_at - fullpath + 1);
1412 if (!fsname)
1413 return ZFS_ERR_OUT_OF_MEMORY;
1414 memcpy(fsname, fullpath, ptr_at - fullpath);
1415 fsname[ptr_at - fullpath] = 0;
1416 if (ptr_at[1] && ptr_at[1] != '/') {
1417 snapname = malloc(ptr_slash - ptr_at);
1418 if (!snapname) {
1419 free(fsname);
1420 return ZFS_ERR_OUT_OF_MEMORY;
1421 }
1422 memcpy(snapname, ptr_at + 1, ptr_slash - ptr_at - 1);
1423 snapname[ptr_slash - ptr_at - 1] = 0;
1424 } else {
1425 snapname = 0;
1426 }
1427 if (ptr_slash)
1428 filename = ptr_slash;
1429 else
1430 filename = "/";
1431 printf("zfs fsname = '%s' snapname='%s' filename = '%s'\n",
1432 fsname, snapname, filename);
1433 }
1434
1435
1436 err = get_filesystem_dnode(&(data->mos), fsname, dn, data);
1437
1438 if (err) {
1439 free(fsname);
1440 free(snapname);
1441 return err;
1442 }
1443
1444 headobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&dn->dn))->dd_head_dataset_obj, dn->endian);
1445
1446 err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, mdn, data);
1447 if (err) {
1448 free(fsname);
1449 free(snapname);
1450 return err;
1451 }
1452
1453 if (snapname) {
1454 uint64_t snapobj;
1455
1456 snapobj = zfs_to_cpu64(((dsl_dataset_phys_t *) DN_BONUS(&mdn->dn))->ds_snapnames_zapobj, mdn->endian);
1457
1458 err = dnode_get(&(data->mos), snapobj,
1459 DMU_OT_DSL_DS_SNAP_MAP, mdn, data);
1460 if (!err)
1461 err = zap_lookup(mdn, snapname, &headobj, data);
1462 if (!err)
1463 err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, mdn, data);
1464 if (err) {
1465 free(fsname);
1466 free(snapname);
1467 return err;
1468 }
1469 }
1470
1471 if (mdnobj)
1472 *mdnobj = headobj;
1473
1474 make_mdn(mdn, data);
1475
1476 if (*isfs) {
1477 free(fsname);
1478 free(snapname);
1479 return ZFS_ERR_NONE;
1480 }
1481 err = dnode_get_path(mdn, filename, dn, data);
1482 free(fsname);
1483 free(snapname);
1484 return err;
1485}
1486
1487/*
1488 * For a given XDR packed nvlist, verify the first 4 bytes and move on.
1489 *
1490 * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
1491 *
1492 * encoding method/host endian (4 bytes)
1493 * nvl_version (4 bytes)
1494 * nvl_nvflag (4 bytes)
1495 * encoded nvpairs:
1496 * encoded size of the nvpair (4 bytes)
1497 * decoded size of the nvpair (4 bytes)
1498 * name string size (4 bytes)
1499 * name string data (sizeof(NV_ALIGN4(string))
1500 * data type (4 bytes)
1501 * # of elements in the nvpair (4 bytes)
1502 * data
1503 * 2 zero's for the last nvpair
1504 * (end of the entire list) (8 bytes)
1505 *
1506 */
1507
1508static int
1509nvlist_find_value(char *nvlist, char *name, int valtype, char **val,
1510 size_t *size_out, size_t *nelm_out)
1511{
1512 int name_len, type, encode_size;
1513 char *nvpair, *nvp_name;
1514
1515 /* Verify if the 1st and 2nd byte in the nvlist are valid. */
1516 /* NOTE: independently of what endianness header announces all
1517 subsequent values are big-endian. */
1518 if (nvlist[0] != NV_ENCODE_XDR || (nvlist[1] != NV_LITTLE_ENDIAN
1519 && nvlist[1] != NV_BIG_ENDIAN)) {
1520 printf("zfs incorrect nvlist header\n");
1521 return ZFS_ERR_BAD_FS;
1522 }
1523
1524 /* skip the header, nvl_version, and nvl_nvflag */
1525 nvlist = nvlist + 4 * 3;
1526 /*
1527 * Loop thru the nvpair list
1528 * The XDR representation of an integer is in big-endian byte order.
1529 */
1530 while ((encode_size = be32_to_cpu(*(uint32_t *) nvlist))) {
1531 int nelm;
1532
1533 nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
1534
1535 name_len = be32_to_cpu(*(uint32_t *) nvpair);
1536 nvpair += 4;
1537
1538 nvp_name = nvpair;
1539 nvpair = nvpair + ((name_len + 3) & ~3); /* align */
1540
1541 type = be32_to_cpu(*(uint32_t *) nvpair);
1542 nvpair += 4;
1543
1544 nelm = be32_to_cpu(*(uint32_t *) nvpair);
1545 if (nelm < 1) {
1546 printf("empty nvpair\n");
1547 return ZFS_ERR_BAD_FS;
1548 }
1549
1550 nvpair += 4;
1551
1552 if ((strncmp(nvp_name, name, name_len) == 0) && type == valtype) {
1553 *val = nvpair;
1554 *size_out = encode_size;
1555 if (nelm_out)
1556 *nelm_out = nelm;
1557 return 1;
1558 }
1559
1560 nvlist += encode_size; /* goto the next nvpair */
1561 }
1562 return 0;
1563}
1564
mwleeds@mailtundra.comf5eb1222024-04-06 18:47:27 -07001565int is_word_aligned_ptr(void *ptr) {
1566 return ((uintptr_t)ptr & (sizeof(void *) - 1)) == 0;
1567}
1568
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001569int
1570zfs_nvlist_lookup_uint64(char *nvlist, char *name, uint64_t *out)
1571{
1572 char *nvpair;
1573 size_t size;
1574 int found;
1575
1576 found = nvlist_find_value(nvlist, name, DATA_TYPE_UINT64, &nvpair, &size, 0);
1577 if (!found)
1578 return 0;
1579 if (size < sizeof(uint64_t)) {
1580 printf("invalid uint64\n");
1581 return ZFS_ERR_BAD_FS;
1582 }
1583
mwleeds@mailtundra.comf5eb1222024-04-06 18:47:27 -07001584 /* On arm64, calling be64_to_cpu() on a value stored at a memory address
1585 * that's not 8-byte aligned causes the CPU to reset. Avoid that by copying the
1586 * value somewhere else if needed.
1587 */
1588 if (!is_word_aligned_ptr((void *)nvpair)) {
1589 uint64_t *alignedptr = malloc(sizeof(uint64_t));
1590 if (!alignedptr)
1591 return 0;
1592 memcpy(alignedptr, nvpair, sizeof(uint64_t));
1593 *out = be64_to_cpu(*alignedptr);
1594 free(alignedptr);
1595 return 1;
1596 }
1597
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001598 *out = be64_to_cpu(*(uint64_t *) nvpair);
1599 return 1;
1600}
1601
1602char *
1603zfs_nvlist_lookup_string(char *nvlist, char *name)
1604{
1605 char *nvpair;
1606 char *ret;
1607 size_t slen;
1608 size_t size;
1609 int found;
1610
1611 found = nvlist_find_value(nvlist, name, DATA_TYPE_STRING, &nvpair, &size, 0);
1612 if (!found)
1613 return 0;
1614 if (size < 4) {
1615 printf("invalid string\n");
1616 return 0;
1617 }
1618 slen = be32_to_cpu(*(uint32_t *) nvpair);
1619 if (slen > size - 4)
1620 slen = size - 4;
1621 ret = malloc(slen + 1);
1622 if (!ret)
1623 return 0;
1624 memcpy(ret, nvpair + 4, slen);
1625 ret[slen] = 0;
1626 return ret;
1627}
1628
1629char *
1630zfs_nvlist_lookup_nvlist(char *nvlist, char *name)
1631{
1632 char *nvpair;
1633 char *ret;
1634 size_t size;
1635 int found;
1636
1637 found = nvlist_find_value(nvlist, name, DATA_TYPE_NVLIST, &nvpair,
1638 &size, 0);
1639 if (!found)
1640 return 0;
mwleeds@mailtundra.com437d7882024-04-06 18:47:26 -07001641
1642 /* Allocate 12 bytes in addition to the nvlist size: One uint32 before the
1643 * nvlist to hold the encoding method, and two zero uint32's after the
1644 * nvlist as the NULL terminator.
1645 */
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001646 ret = calloc(1, size + 3 * sizeof(uint32_t));
1647 if (!ret)
1648 return 0;
1649 memcpy(ret, nvlist, sizeof(uint32_t));
1650
1651 memcpy(ret + sizeof(uint32_t), nvpair, size);
1652 return ret;
1653}
1654
1655int
1656zfs_nvlist_lookup_nvlist_array_get_nelm(char *nvlist, char *name)
1657{
1658 char *nvpair;
1659 size_t nelm, size;
1660 int found;
1661
1662 found = nvlist_find_value(nvlist, name, DATA_TYPE_NVLIST, &nvpair,
1663 &size, &nelm);
1664 if (!found)
1665 return -1;
1666 return nelm;
1667}
1668
1669char *
1670zfs_nvlist_lookup_nvlist_array(char *nvlist, char *name,
1671 size_t index)
1672{
1673 char *nvpair, *nvpairptr;
1674 int found;
1675 char *ret;
1676 size_t size;
1677 unsigned i;
1678 size_t nelm;
1679
1680 found = nvlist_find_value(nvlist, name, DATA_TYPE_NVLIST, &nvpair,
1681 &size, &nelm);
1682 if (!found)
1683 return 0;
1684 if (index >= nelm) {
1685 printf("trying to lookup past nvlist array\n");
1686 return 0;
1687 }
1688
1689 nvpairptr = nvpair;
1690
1691 for (i = 0; i < index; i++) {
1692 uint32_t encode_size;
1693
1694 /* skip the header, nvl_version, and nvl_nvflag */
1695 nvpairptr = nvpairptr + 4 * 2;
1696
1697 while (nvpairptr < nvpair + size
1698 && (encode_size = be32_to_cpu(*(uint32_t *) nvpairptr)))
1699 nvlist += encode_size; /* goto the next nvpair */
1700
1701 nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
1702 }
1703
1704 if (nvpairptr >= nvpair + size
1705 || nvpairptr + be32_to_cpu(*(uint32_t *) (nvpairptr + 4 * 2))
1706 >= nvpair + size) {
1707 printf("incorrect nvlist array\n");
1708 return 0;
1709 }
1710
1711 ret = calloc(1, be32_to_cpu(*(uint32_t *) (nvpairptr + 4 * 2))
1712 + 3 * sizeof(uint32_t));
1713 if (!ret)
1714 return 0;
1715 memcpy(ret, nvlist, sizeof(uint32_t));
1716
1717 memcpy(ret + sizeof(uint32_t), nvpairptr, size);
1718 return ret;
1719}
1720
1721static int
1722int_zfs_fetch_nvlist(struct zfs_data *data, char **nvlist)
1723{
1724 int err;
1725
1726 *nvlist = malloc(VDEV_PHYS_SIZE);
1727 /* Read in the vdev name-value pair list (112K). */
1728 err = zfs_devread(data->vdev_phys_sector, 0, VDEV_PHYS_SIZE, *nvlist);
1729 if (err) {
1730 free(*nvlist);
1731 *nvlist = 0;
1732 return err;
1733 }
1734 return ZFS_ERR_NONE;
1735}
1736
1737/*
1738 * Check the disk label information and retrieve needed vdev name-value pairs.
1739 *
1740 */
1741static int
1742check_pool_label(struct zfs_data *data)
1743{
1744 uint64_t pool_state;
1745 char *nvlist; /* for the pool */
1746 char *vdevnvlist; /* for the vdev */
1747 uint64_t diskguid;
1748 uint64_t version;
1749 int found;
1750 int err;
1751
1752 err = int_zfs_fetch_nvlist(data, &nvlist);
1753 if (err)
1754 return err;
1755
1756 found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_POOL_STATE,
1757 &pool_state);
1758 if (!found) {
1759 free(nvlist);
1760 printf("zfs pool state not found\n");
1761 return ZFS_ERR_BAD_FS;
1762 }
1763
1764 if (pool_state == POOL_STATE_DESTROYED) {
1765 free(nvlist);
1766 printf("zpool is marked as destroyed\n");
1767 return ZFS_ERR_BAD_FS;
1768 }
1769
1770 data->label_txg = 0;
1771 found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_POOL_TXG,
1772 &data->label_txg);
1773 if (!found) {
1774 free(nvlist);
1775 printf("zfs pool txg not found\n");
1776 return ZFS_ERR_BAD_FS;
1777 }
1778
1779 /* not an active device */
1780 if (data->label_txg == 0) {
1781 free(nvlist);
1782 printf("zpool is not active\n");
1783 return ZFS_ERR_BAD_FS;
1784 }
1785
1786 found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_VERSION,
1787 &version);
1788 if (!found) {
1789 free(nvlist);
1790 printf("zpool config version not found\n");
1791 return ZFS_ERR_BAD_FS;
1792 }
1793
WHR54ef5252024-05-01 00:28:32 +08001794 if (!is_supported_spa_version(version)) {
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001795 free(nvlist);
1796 printf("SPA version too new %llu > %llu\n",
1797 (unsigned long long) version,
1798 (unsigned long long) SPA_VERSION);
1799 return ZFS_ERR_NOT_IMPLEMENTED_YET;
1800 }
1801
1802 vdevnvlist = zfs_nvlist_lookup_nvlist(nvlist, ZPOOL_CONFIG_VDEV_TREE);
1803 if (!vdevnvlist) {
1804 free(nvlist);
1805 printf("ZFS config vdev tree not found\n");
1806 return ZFS_ERR_BAD_FS;
1807 }
1808
1809 found = zfs_nvlist_lookup_uint64(vdevnvlist, ZPOOL_CONFIG_ASHIFT,
1810 &data->vdev_ashift);
1811 free(vdevnvlist);
1812 if (!found) {
1813 free(nvlist);
1814 printf("ZPOOL config ashift not found\n");
1815 return ZFS_ERR_BAD_FS;
1816 }
1817
1818 found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_GUID, &diskguid);
1819 if (!found) {
1820 free(nvlist);
1821 printf("ZPOOL config guid not found\n");
1822 return ZFS_ERR_BAD_FS;
1823 }
1824
1825 found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_POOL_GUID, &data->pool_guid);
1826 if (!found) {
1827 free(nvlist);
1828 printf("ZPOOL config pool guid not found\n");
1829 return ZFS_ERR_BAD_FS;
1830 }
1831
1832 free(nvlist);
1833
1834 printf("ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
1835 (unsigned long long) data->pool_guid,
1836 (unsigned long long) data->pool_guid,
1837 (unsigned long long) diskguid,
1838 (unsigned long long) diskguid,
1839 (unsigned long long) data->label_txg,
1840 (unsigned long long) version,
1841 (unsigned long long) data->vdev_ashift);
1842
1843 return ZFS_ERR_NONE;
1844}
1845
1846/*
1847 * vdev_label_start returns the physical disk offset (in bytes) of
1848 * label "l".
1849 */
1850static uint64_t vdev_label_start(uint64_t psize, int l)
1851{
1852 return (l * sizeof(vdev_label_t) + (l < VDEV_LABELS / 2 ?
1853 0 : psize -
1854 VDEV_LABELS * sizeof(vdev_label_t)));
1855}
1856
1857void
1858zfs_unmount(struct zfs_data *data)
1859{
1860 free(data->dnode_buf);
1861 free(data->dnode_mdn);
1862 free(data->file_buf);
1863 free(data);
1864}
1865
1866/*
1867 * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
1868 * to the memory address MOS.
1869 *
1870 */
1871struct zfs_data *
1872zfs_mount(device_t dev)
1873{
1874 struct zfs_data *data = 0;
1875 int label = 0, bestlabel = -1;
1876 char *ub_array;
1877 uberblock_t *ubbest;
1878 uberblock_t *ubcur = NULL;
1879 void *osp = 0;
1880 size_t ospsize;
1881 int err;
1882
1883 data = malloc(sizeof(*data));
1884 if (!data)
1885 return 0;
1886 memset(data, 0, sizeof(*data));
1887
1888 ub_array = malloc(VDEV_UBERBLOCK_RING);
1889 if (!ub_array) {
1890 zfs_unmount(data);
1891 return 0;
1892 }
1893
1894 ubbest = malloc(sizeof(*ubbest));
1895 if (!ubbest) {
Jorgen Lundman8d119d82014-11-07 10:08:35 +09001896 free(ub_array);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001897 zfs_unmount(data);
1898 return 0;
1899 }
1900 memset(ubbest, 0, sizeof(*ubbest));
1901
1902 /*
1903 * some eltorito stacks don't give us a size and
1904 * we end up setting the size to MAXUINT, further
1905 * some of these devices stop working once a single
1906 * read past the end has been issued. Checking
1907 * for a maximum part_length and skipping the backup
1908 * labels at the end of the slice/partition/device
1909 * avoids breaking down on such devices.
1910 */
1911 const int vdevnum =
1912 dev->part_length == 0 ?
1913 VDEV_LABELS / 2 : VDEV_LABELS;
1914
1915 /* Size in bytes of the device (disk or partition) aligned to label size*/
1916 uint64_t device_size =
1917 dev->part_length << SECTOR_BITS;
1918
1919 const uint64_t alignedbytes =
1920 P2ALIGN(device_size, (uint64_t) sizeof(vdev_label_t));
1921
1922 for (label = 0; label < vdevnum; label++) {
1923 uint64_t labelstartbytes = vdev_label_start(alignedbytes, label);
1924 uint64_t labelstart = labelstartbytes >> SECTOR_BITS;
1925
1926 debug("zfs reading label %d at sector %llu (byte %llu)\n",
1927 label, (unsigned long long) labelstart,
1928 (unsigned long long) labelstartbytes);
1929
1930 data->vdev_phys_sector = labelstart +
1931 ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SECTOR_BITS);
1932
1933 err = check_pool_label(data);
1934 if (err) {
1935 printf("zfs error checking label %d\n", label);
1936 continue;
1937 }
1938
1939 /* Read in the uberblock ring (128K). */
1940 err = zfs_devread(data->vdev_phys_sector +
1941 (VDEV_PHYS_SIZE >> SECTOR_BITS),
1942 0, VDEV_UBERBLOCK_RING, ub_array);
1943 if (err) {
1944 printf("zfs error reading uberblock ring for label %d\n", label);
1945 continue;
1946 }
1947
1948 ubcur = find_bestub(ub_array, data);
1949 if (!ubcur) {
1950 printf("zfs No good uberblocks found in label %d\n", label);
1951 continue;
1952 }
1953
1954 if (vdev_uberblock_compare(ubcur, ubbest) > 0) {
1955 /* Looks like the block is good, so use it.*/
1956 memcpy(ubbest, ubcur, sizeof(*ubbest));
1957 bestlabel = label;
1958 debug("zfs Current best uberblock found in label %d\n", label);
1959 }
1960 }
1961 free(ub_array);
1962
1963 /* We zero'd the structure to begin with. If we never assigned to it,
1964 magic will still be zero. */
1965 if (!ubbest->ub_magic) {
1966 printf("couldn't find a valid ZFS label\n");
1967 zfs_unmount(data);
1968 free(ubbest);
1969 return 0;
1970 }
1971
1972 debug("zfs ubbest %p in label %d\n", ubbest, bestlabel);
1973
1974 zfs_endian_t ub_endian =
1975 zfs_to_cpu64(ubbest->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
1976 ? LITTLE_ENDIAN : BIG_ENDIAN;
1977
1978 debug("zfs endian set to %s\n", !ub_endian ? "big" : "little");
1979
1980 err = zio_read(&ubbest->ub_rootbp, ub_endian, &osp, &ospsize, data);
1981
1982 if (err) {
1983 printf("couldn't zio_read object directory\n");
1984 zfs_unmount(data);
Jorgen Lundman8d119d82014-11-07 10:08:35 +09001985 free(osp);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00001986 free(ubbest);
1987 return 0;
1988 }
1989
1990 if (ospsize < OBJSET_PHYS_SIZE_V14) {
1991 printf("osp too small\n");
1992 zfs_unmount(data);
1993 free(osp);
1994 free(ubbest);
1995 return 0;
1996 }
1997
1998 /* Got the MOS. Save it at the memory addr MOS. */
1999 memmove(&(data->mos.dn), &((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
2000 data->mos.endian =
2001 (zfs_to_cpu64(ubbest->ub_rootbp.blk_prop, ub_endian) >> 63) & 1;
2002 memmove(&(data->current_uberblock), ubbest, sizeof(uberblock_t));
2003
2004 free(osp);
2005 free(ubbest);
2006
2007 return data;
2008}
2009
2010int
2011zfs_fetch_nvlist(device_t dev, char **nvlist)
2012{
2013 struct zfs_data *zfs;
2014 int err;
2015
2016 zfs = zfs_mount(dev);
2017 if (!zfs)
2018 return ZFS_ERR_BAD_FS;
2019 err = int_zfs_fetch_nvlist(zfs, nvlist);
2020 zfs_unmount(zfs);
2021 return err;
2022}
2023
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002024/*
2025 * zfs_open() locates a file in the rootpool by following the
2026 * MOS and places the dnode of the file in the memory address DNODE.
2027 */
2028int
2029zfs_open(struct zfs_file *file, const char *fsfilename)
2030{
2031 struct zfs_data *data;
2032 int err;
2033 int isfs;
2034
2035 data = zfs_mount(file->device);
2036 if (!data)
2037 return ZFS_ERR_BAD_FS;
2038
2039 err = dnode_get_fullpath(fsfilename, &(data->mdn), 0,
2040 &(data->dnode), &isfs, data);
2041 if (err) {
2042 zfs_unmount(data);
2043 return err;
2044 }
2045
2046 if (isfs) {
2047 zfs_unmount(data);
2048 printf("Missing @ or / separator\n");
2049 return ZFS_ERR_FILE_NOT_FOUND;
2050 }
2051
2052 /* We found the dnode for this file. Verify if it is a plain file. */
2053 if (data->dnode.dn.dn_type != DMU_OT_PLAIN_FILE_CONTENTS) {
2054 zfs_unmount(data);
2055 printf("not a file\n");
2056 return ZFS_ERR_BAD_FILE_TYPE;
2057 }
2058
2059 /* get the file size and set the file position to 0 */
2060
2061 /*
2062 * For DMU_OT_SA we will need to locate the SIZE attribute
2063 * attribute, which could be either in the bonus buffer
2064 * or the "spill" block.
2065 */
2066 if (data->dnode.dn.dn_bonustype == DMU_OT_SA) {
2067 void *sahdrp;
2068 int hdrsize;
2069
2070 if (data->dnode.dn.dn_bonuslen != 0) {
2071 sahdrp = (sa_hdr_phys_t *) DN_BONUS(&data->dnode.dn);
2072 } else if (data->dnode.dn.dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
2073 blkptr_t *bp = &data->dnode.dn.dn_spill;
2074
2075 err = zio_read(bp, data->dnode.endian, &sahdrp, NULL, data);
2076 if (err)
2077 return err;
2078 } else {
2079 printf("filesystem is corrupt :(\n");
2080 return ZFS_ERR_BAD_FS;
2081 }
2082
2083 hdrsize = SA_HDR_SIZE(((sa_hdr_phys_t *) sahdrp));
2084 file->size = *(uint64_t *) ((char *) sahdrp + hdrsize + SA_SIZE_OFFSET);
Jorgen Lundman8d119d82014-11-07 10:08:35 +09002085 if ((data->dnode.dn.dn_bonuslen == 0) &&
2086 (data->dnode.dn.dn_flags & DNODE_FLAG_SPILL_BLKPTR))
2087 free(sahdrp);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002088 } else {
2089 file->size = zfs_to_cpu64(((znode_phys_t *) DN_BONUS(&data->dnode.dn))->zp_size, data->dnode.endian);
2090 }
2091
2092 file->data = data;
2093 file->offset = 0;
2094
2095 return ZFS_ERR_NONE;
2096}
2097
2098uint64_t
2099zfs_read(zfs_file_t file, char *buf, uint64_t len)
2100{
2101 struct zfs_data *data = (struct zfs_data *) file->data;
2102 int blksz, movesize;
2103 uint64_t length;
2104 int64_t red;
2105 int err;
2106
2107 if (data->file_buf == NULL) {
2108 data->file_buf = malloc(SPA_MAXBLOCKSIZE);
2109 if (!data->file_buf)
2110 return -1;
2111 data->file_start = data->file_end = 0;
2112 }
2113
2114 /*
2115 * If offset is in memory, move it into the buffer provided and return.
2116 */
2117 if (file->offset >= data->file_start
2118 && file->offset + len <= data->file_end) {
2119 memmove(buf, data->file_buf + file->offset - data->file_start,
2120 len);
2121 return len;
2122 }
2123
2124 blksz = zfs_to_cpu16(data->dnode.dn.dn_datablkszsec,
2125 data->dnode.endian) << SPA_MINBLOCKSHIFT;
2126
2127 /*
2128 * Entire Dnode is too big to fit into the space available. We
2129 * will need to read it in chunks. This could be optimized to
2130 * read in as large a chunk as there is space available, but for
2131 * now, this only reads in one data block at a time.
2132 */
2133 length = len;
2134 red = 0;
2135 while (length) {
2136 void *t;
2137 /*
2138 * Find requested blkid and the offset within that block.
2139 */
Alejandro Mery8b773142012-10-31 08:21:33 +00002140 uint64_t blkid = file->offset + red;
mwleeds@mailtundra.com67766f72024-04-06 18:47:29 -07002141 uint64_t blkoff = do_div(blkid, blksz);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002142 free(data->file_buf);
2143 data->file_buf = 0;
2144
2145 err = dmu_read(&(data->dnode), blkid, &t,
2146 0, data);
2147 data->file_buf = t;
2148 if (err)
2149 return -1;
2150
2151 data->file_start = blkid * blksz;
2152 data->file_end = data->file_start + blksz;
2153
Masahiro Yamadab62b39b2014-09-18 13:28:06 +09002154 movesize = min(length, data->file_end - (int)file->offset - red);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002155
mwleeds@mailtundra.com67766f72024-04-06 18:47:29 -07002156 memmove(buf, data->file_buf + blkoff, movesize);
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002157 buf += movesize;
2158 length -= movesize;
2159 red += movesize;
2160 }
2161
2162 return len;
2163}
2164
2165int
2166zfs_close(zfs_file_t file)
2167{
2168 zfs_unmount((struct zfs_data *) file->data);
2169 return ZFS_ERR_NONE;
2170}
2171
2172int
2173zfs_getmdnobj(device_t dev, const char *fsfilename,
2174 uint64_t *mdnobj)
2175{
2176 struct zfs_data *data;
2177 int err;
2178 int isfs;
2179
2180 data = zfs_mount(dev);
2181 if (!data)
2182 return ZFS_ERR_BAD_FS;
2183
2184 err = dnode_get_fullpath(fsfilename, &(data->mdn), mdnobj,
2185 &(data->dnode), &isfs, data);
2186 zfs_unmount(data);
2187 return err;
2188}
2189
2190static void
2191fill_fs_info(struct zfs_dirhook_info *info,
2192 dnode_end_t mdn, struct zfs_data *data)
2193{
2194 int err;
2195 dnode_end_t dn;
2196 uint64_t objnum;
2197 uint64_t headobj;
2198
2199 memset(info, 0, sizeof(*info));
2200
2201 info->dir = 1;
2202
2203 if (mdn.dn.dn_type == DMU_OT_DSL_DIR) {
2204 headobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&mdn.dn))->dd_head_dataset_obj, mdn.endian);
2205
2206 err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, &mdn, data);
2207 if (err) {
2208 printf("zfs failed here 1\n");
2209 return;
2210 }
2211 }
2212 make_mdn(&mdn, data);
2213 err = dnode_get(&mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
2214 &dn, data);
2215 if (err) {
2216 printf("zfs failed here 2\n");
2217 return;
2218 }
2219
2220 err = zap_lookup(&dn, ZFS_ROOT_OBJ, &objnum, data);
2221 if (err) {
2222 printf("zfs failed here 3\n");
2223 return;
2224 }
2225
2226 err = dnode_get(&mdn, objnum, 0, &dn, data);
2227 if (err) {
2228 printf("zfs failed here 4\n");
2229 return;
2230 }
2231
2232 info->mtimeset = 1;
2233 info->mtime = zfs_to_cpu64(((znode_phys_t *) DN_BONUS(&dn.dn))->zp_mtime[0], dn.endian);
2234
2235 return;
2236}
2237
2238static int iterate_zap(const char *name, uint64_t val, struct zfs_data *data)
2239{
2240 struct zfs_dirhook_info info;
2241 dnode_end_t dn;
2242
2243 memset(&info, 0, sizeof(info));
2244
2245 dnode_get(&(data->mdn), val, 0, &dn, data);
2246 info.mtimeset = 1;
2247 info.mtime = zfs_to_cpu64(((znode_phys_t *) DN_BONUS(&dn.dn))->zp_mtime[0], dn.endian);
2248 info.dir = (dn.dn.dn_type == DMU_OT_DIRECTORY_CONTENTS);
2249 debug("zfs type=%d, name=%s\n",
2250 (int)dn.dn.dn_type, (char *)name);
2251 if (!data->userhook)
2252 return 0;
2253 return data->userhook(name, &info);
2254}
2255
2256static int iterate_zap_fs(const char *name, uint64_t val, struct zfs_data *data)
2257{
2258 struct zfs_dirhook_info info;
2259 dnode_end_t mdn;
2260 int err;
2261 err = dnode_get(&(data->mos), val, 0, &mdn, data);
2262 if (err)
2263 return 0;
2264 if (mdn.dn.dn_type != DMU_OT_DSL_DIR)
2265 return 0;
2266
2267 fill_fs_info(&info, mdn, data);
2268
2269 if (!data->userhook)
2270 return 0;
2271 return data->userhook(name, &info);
2272}
2273
2274static int iterate_zap_snap(const char *name, uint64_t val, struct zfs_data *data)
2275{
2276 struct zfs_dirhook_info info;
2277 char *name2;
2278 int ret = 0;
2279 dnode_end_t mdn;
2280 int err;
2281
2282 err = dnode_get(&(data->mos), val, 0, &mdn, data);
2283 if (err)
2284 return 0;
2285
2286 if (mdn.dn.dn_type != DMU_OT_DSL_DATASET)
2287 return 0;
2288
2289 fill_fs_info(&info, mdn, data);
2290
2291 name2 = malloc(strlen(name) + 2);
2292 name2[0] = '@';
2293 memcpy(name2 + 1, name, strlen(name) + 1);
2294 if (data->userhook)
2295 ret = data->userhook(name2, &info);
2296 free(name2);
2297 return ret;
2298}
2299
2300int
2301zfs_ls(device_t device, const char *path,
2302 int (*hook)(const char *, const struct zfs_dirhook_info *))
2303{
2304 struct zfs_data *data;
2305 int err;
2306 int isfs;
Jorgen Lundman9b4a1f92012-07-19 20:48:25 +00002307
2308 data = zfs_mount(device);
2309 if (!data)
2310 return ZFS_ERR_BAD_FS;
2311
2312 data->userhook = hook;
2313
2314 err = dnode_get_fullpath(path, &(data->mdn), 0, &(data->dnode), &isfs, data);
2315 if (err) {
2316 zfs_unmount(data);
2317 return err;
2318 }
2319 if (isfs) {
2320 uint64_t childobj, headobj;
2321 uint64_t snapobj;
2322 dnode_end_t dn;
2323 struct zfs_dirhook_info info;
2324
2325 fill_fs_info(&info, data->dnode, data);
2326 hook("@", &info);
2327
2328 childobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&data->dnode.dn))->dd_child_dir_zapobj, data->dnode.endian);
2329 headobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&data->dnode.dn))->dd_head_dataset_obj, data->dnode.endian);
2330 err = dnode_get(&(data->mos), childobj,
2331 DMU_OT_DSL_DIR_CHILD_MAP, &dn, data);
2332 if (err) {
2333 zfs_unmount(data);
2334 return err;
2335 }
2336
2337
2338 zap_iterate(&dn, iterate_zap_fs, data);
2339
2340 err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, &dn, data);
2341 if (err) {
2342 zfs_unmount(data);
2343 return err;
2344 }
2345
2346 snapobj = zfs_to_cpu64(((dsl_dataset_phys_t *) DN_BONUS(&dn.dn))->ds_snapnames_zapobj, dn.endian);
2347
2348 err = dnode_get(&(data->mos), snapobj,
2349 DMU_OT_DSL_DS_SNAP_MAP, &dn, data);
2350 if (err) {
2351 zfs_unmount(data);
2352 return err;
2353 }
2354
2355 zap_iterate(&dn, iterate_zap_snap, data);
2356 } else {
2357 if (data->dnode.dn.dn_type != DMU_OT_DIRECTORY_CONTENTS) {
2358 zfs_unmount(data);
2359 printf("not a directory\n");
2360 return ZFS_ERR_BAD_FILE_TYPE;
2361 }
2362 zap_iterate(&(data->dnode), iterate_zap, data);
2363 }
2364 zfs_unmount(data);
2365 return ZFS_ERR_NONE;
2366}