| // SPDX-License-Identifier: GPL-2.0+ |
| /* |
| * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com> |
| */ |
| #define LOG_CATEGORY UCLASS_CLK |
| #include <kendryte/pll.h> |
| |
| #include <asm/io.h> |
| /* For DIV_ROUND_DOWN_ULL, defined in linux/kernel.h */ |
| #include <div64.h> |
| #include <dt-bindings/clock/k210-sysctl.h> |
| #include <linux/bitfield.h> |
| #include <linux/clk-provider.h> |
| #include <linux/delay.h> |
| #include <linux/err.h> |
| #include <log.h> |
| #include <serial.h> |
| |
| #define CLK_K210_PLL "k210_clk_pll" |
| |
| #ifdef CONFIG_CLK_K210_SET_RATE |
| static int k210_pll_enable(struct clk *clk); |
| static int k210_pll_disable(struct clk *clk); |
| |
| /* |
| * The PLL included with the Kendryte K210 appears to be a True Circuits, Inc. |
| * General-Purpose PLL. The logical layout of the PLL with internal feedback is |
| * approximately the following: |
| * |
| * +---------------+ |
| * |reference clock| |
| * +---------------+ |
| * | |
| * v |
| * +--+ |
| * |/r| |
| * +--+ |
| * | |
| * v |
| * +-------------+ |
| * |divided clock| |
| * +-------------+ |
| * | |
| * v |
| * +--------------+ |
| * |phase detector|<---+ |
| * +--------------+ | |
| * | | |
| * v +--------------+ |
| * +---+ |feedback clock| |
| * |VCO| +--------------+ |
| * +---+ ^ |
| * | +--+ | |
| * +--->|/f|---+ |
| * | +--+ |
| * v |
| * +---+ |
| * |/od| |
| * +---+ |
| * | |
| * v |
| * +------+ |
| * |output| |
| * +------+ |
| * |
| * The k210 PLLs have three factors: r, f, and od. Because of the feedback mode, |
| * the effect of the division by f is to multiply the input frequency. The |
| * equation for the output rate is |
| * rate = (rate_in * f) / (r * od). |
| * Moving knowns to one side of the equation, we get |
| * rate / rate_in = f / (r * od) |
| * Rearranging slightly, |
| * abs_error = abs((rate / rate_in) - (f / (r * od))). |
| * To get relative, error, we divide by the expected ratio |
| * error = abs((rate / rate_in) - (f / (r * od))) / (rate / rate_in). |
| * Simplifying, |
| * error = abs(1 - f / (r * od)) / (rate / rate_in) |
| * error = abs(1 - (f * rate_in) / (r * od * rate)) |
| * Using the constants ratio = rate / rate_in and inv_ratio = rate_in / rate, |
| * error = abs((f * inv_ratio) / (r * od) - 1) |
| * This is the error used in evaluating parameters. |
| * |
| * r and od are four bits each, while f is six bits. Because r and od are |
| * multiplied together, instead of the full 256 values possible if both bits |
| * were used fully, there are only 97 distinct products. Combined with f, there |
| * are 6208 theoretical settings for the PLL. However, most of these settings |
| * can be ruled out immediately because they do not have the correct ratio. |
| * |
| * In addition to the constraint of approximating the desired ratio, parameters |
| * must also keep internal pll frequencies within acceptable ranges. The divided |
| * clock's minimum and maximum frequencies have a ratio of around 128. This |
| * leaves fairly substantial room to work with, especially since the only |
| * affected parameter is r. The VCO's minimum and maximum frequency have a ratio |
| * of 5, which is considerably more restrictive. |
| * |
| * The r and od factors are stored in a table. This is to make it easy to find |
| * the next-largest product. Some products have multiple factorizations, but |
| * only when one factor has at least a 2.5x ratio to the factors of the other |
| * factorization. This is because any smaller ratio would not make a difference |
| * when ensuring the VCO's frequency is within spec. |
| * |
| * Throughout the calculation function, fixed point arithmetic is used. Because |
| * the range of rate and rate_in may be up to 1.75 GHz, or around 2^30, 64-bit |
| * 32.32 fixed-point numbers are used to represent ratios. In general, to |
| * implement division, the numerator is first multiplied by 2^32. This gives a |
| * result where the whole number part is in the upper 32 bits, and the fraction |
| * is in the lower 32 bits. |
| * |
| * In general, rounding is done to the closest integer. This helps find the best |
| * approximation for the ratio. Rounding in one direction (e.g down) could cause |
| * the function to miss a better ratio with one of the parameters increased by |
| * one. |
| */ |
| |
| /* |
| * The factors table was generated with the following python code: |
| * |
| * def p(x, y): |
| * return (1.0*x/y > 2.5) or (1.0*y/x > 2.5) |
| * |
| * factors = {} |
| * for i in range(1, 17): |
| * for j in range(1, 17): |
| * fs = factors.get(i*j) or [] |
| * if fs == [] or all([ |
| * (p(i, x) and p(i, y)) or (p(j, x) and p(j, y)) |
| * for (x, y) in fs]): |
| * fs.append((i, j)) |
| * factors[i*j] = fs |
| * |
| * for k, l in sorted(factors.items()): |
| * for v in l: |
| * print("PACK(%s, %s)," % v) |
| */ |
| #define PACK(r, od) (((((r) - 1) & 0xF) << 4) | (((od) - 1) & 0xF)) |
| #define UNPACK_R(val) ((((val) >> 4) & 0xF) + 1) |
| #define UNPACK_OD(val) (((val) & 0xF) + 1) |
| static const u8 factors[] = { |
| PACK(1, 1), |
| PACK(1, 2), |
| PACK(1, 3), |
| PACK(1, 4), |
| PACK(1, 5), |
| PACK(1, 6), |
| PACK(1, 7), |
| PACK(1, 8), |
| PACK(1, 9), |
| PACK(3, 3), |
| PACK(1, 10), |
| PACK(1, 11), |
| PACK(1, 12), |
| PACK(3, 4), |
| PACK(1, 13), |
| PACK(1, 14), |
| PACK(1, 15), |
| PACK(3, 5), |
| PACK(1, 16), |
| PACK(4, 4), |
| PACK(2, 9), |
| PACK(2, 10), |
| PACK(3, 7), |
| PACK(2, 11), |
| PACK(2, 12), |
| PACK(5, 5), |
| PACK(2, 13), |
| PACK(3, 9), |
| PACK(2, 14), |
| PACK(2, 15), |
| PACK(2, 16), |
| PACK(3, 11), |
| PACK(5, 7), |
| PACK(3, 12), |
| PACK(3, 13), |
| PACK(4, 10), |
| PACK(3, 14), |
| PACK(4, 11), |
| PACK(3, 15), |
| PACK(3, 16), |
| PACK(7, 7), |
| PACK(5, 10), |
| PACK(4, 13), |
| PACK(6, 9), |
| PACK(5, 11), |
| PACK(4, 14), |
| PACK(4, 15), |
| PACK(7, 9), |
| PACK(4, 16), |
| PACK(5, 13), |
| PACK(6, 11), |
| PACK(5, 14), |
| PACK(6, 12), |
| PACK(5, 15), |
| PACK(7, 11), |
| PACK(6, 13), |
| PACK(5, 16), |
| PACK(9, 9), |
| PACK(6, 14), |
| PACK(8, 11), |
| PACK(6, 15), |
| PACK(7, 13), |
| PACK(6, 16), |
| PACK(7, 14), |
| PACK(9, 11), |
| PACK(10, 10), |
| PACK(8, 13), |
| PACK(7, 15), |
| PACK(9, 12), |
| PACK(10, 11), |
| PACK(7, 16), |
| PACK(9, 13), |
| PACK(8, 15), |
| PACK(11, 11), |
| PACK(9, 14), |
| PACK(8, 16), |
| PACK(10, 13), |
| PACK(11, 12), |
| PACK(9, 15), |
| PACK(10, 14), |
| PACK(11, 13), |
| PACK(9, 16), |
| PACK(10, 15), |
| PACK(11, 14), |
| PACK(12, 13), |
| PACK(10, 16), |
| PACK(11, 15), |
| PACK(12, 14), |
| PACK(13, 13), |
| PACK(11, 16), |
| PACK(12, 15), |
| PACK(13, 14), |
| PACK(12, 16), |
| PACK(13, 15), |
| PACK(14, 14), |
| PACK(13, 16), |
| PACK(14, 15), |
| PACK(14, 16), |
| PACK(15, 15), |
| PACK(15, 16), |
| PACK(16, 16), |
| }; |
| |
| TEST_STATIC int k210_pll_calc_config(u32 rate, u32 rate_in, |
| struct k210_pll_config *best) |
| { |
| int i; |
| s64 error, best_error; |
| u64 ratio, inv_ratio; /* fixed point 32.32 ratio of the rates */ |
| u64 max_r; |
| u64 r, f, od; |
| |
| /* |
| * Can't go over 1.75 GHz or under 21.25 MHz due to limitations on the |
| * VCO frequency. These are not the same limits as below because od can |
| * reduce the output frequency by 16. |
| */ |
| if (rate > 1750000000 || rate < 21250000) |
| return -EINVAL; |
| |
| /* Similar restrictions on the input rate */ |
| if (rate_in > 1750000000 || rate_in < 13300000) |
| return -EINVAL; |
| |
| ratio = DIV_ROUND_CLOSEST_ULL((u64)rate << 32, rate_in); |
| inv_ratio = DIV_ROUND_CLOSEST_ULL((u64)rate_in << 32, rate); |
| /* Can't increase by more than 64 or reduce by more than 256 */ |
| if (rate > rate_in && ratio > (64ULL << 32)) |
| return -EINVAL; |
| else if (rate <= rate_in && inv_ratio > (256ULL << 32)) |
| return -EINVAL; |
| |
| /* |
| * The divided clock (rate_in / r) must stay between 1.75 GHz and 13.3 |
| * MHz. There is no minimum, since the only way to get a higher input |
| * clock than 26 MHz is to use a clock generated by a PLL. Because PLLs |
| * cannot output frequencies greater than 1.75 GHz, the minimum would |
| * never be greater than one. |
| */ |
| max_r = DIV_ROUND_DOWN_ULL(rate_in, 13300000); |
| |
| /* Variables get immediately incremented, so start at -1th iteration */ |
| i = -1; |
| f = 0; |
| r = 0; |
| od = 0; |
| best_error = S64_MAX; |
| error = best_error; |
| /* do-while here so we always try at least one ratio */ |
| do { |
| /* |
| * Whether we swapped r and od while enforcing frequency limits |
| */ |
| bool swapped = false; |
| u64 last_od = od; |
| u64 last_r = r; |
| |
| /* |
| * Try the next largest value for f (or r and od) and |
| * recalculate the other parameters based on that |
| */ |
| if (rate > rate_in) { |
| /* |
| * Skip factors of the same product if we already tried |
| * out that product |
| */ |
| do { |
| i++; |
| r = UNPACK_R(factors[i]); |
| od = UNPACK_OD(factors[i]); |
| } while (i + 1 < ARRAY_SIZE(factors) && |
| r * od == last_r * last_od); |
| |
| /* Round close */ |
| f = (r * od * ratio + BIT(31)) >> 32; |
| if (f > 64) |
| f = 64; |
| } else { |
| u64 tmp = ++f * inv_ratio; |
| bool round_up = !!(tmp & BIT(31)); |
| u32 goal = (tmp >> 32) + round_up; |
| u32 err, last_err; |
| |
| /* Get the next r/od pair in factors */ |
| while (r * od < goal && i + 1 < ARRAY_SIZE(factors)) { |
| i++; |
| r = UNPACK_R(factors[i]); |
| od = UNPACK_OD(factors[i]); |
| } |
| |
| /* |
| * This is a case of double rounding. If we rounded up |
| * above, we need to round down (in cases of ties) here. |
| * This prevents off-by-one errors resulting from |
| * choosing X+2 over X when X.Y rounds up to X+1 and |
| * there is no r * od = X+1. For the converse, when X.Y |
| * is rounded down to X, we should choose X+1 over X-1. |
| */ |
| err = abs(r * od - goal); |
| last_err = abs(last_r * last_od - goal); |
| if (last_err < err || (round_up && last_err == err)) { |
| i--; |
| r = last_r; |
| od = last_od; |
| } |
| } |
| |
| /* |
| * Enforce limits on internal clock frequencies. If we |
| * aren't in spec, try swapping r and od. If everything is |
| * in-spec, calculate the relative error. |
| */ |
| while (true) { |
| /* |
| * Whether the intermediate frequencies are out-of-spec |
| */ |
| bool out_of_spec = false; |
| |
| if (r > max_r) { |
| out_of_spec = true; |
| } else { |
| /* |
| * There is no way to only divide once; we need |
| * to examine the frequency with and without the |
| * effect of od. |
| */ |
| u64 vco = DIV_ROUND_CLOSEST_ULL(rate_in * f, r); |
| |
| if (vco > 1750000000 || vco < 340000000) |
| out_of_spec = true; |
| } |
| |
| if (out_of_spec) { |
| if (!swapped) { |
| u64 tmp = r; |
| |
| r = od; |
| od = tmp; |
| swapped = true; |
| continue; |
| } else { |
| /* |
| * Try looking ahead to see if there are |
| * additional factors for the same |
| * product. |
| */ |
| if (i + 1 < ARRAY_SIZE(factors)) { |
| u64 new_r, new_od; |
| |
| i++; |
| new_r = UNPACK_R(factors[i]); |
| new_od = UNPACK_OD(factors[i]); |
| if (r * od == new_r * new_od) { |
| r = new_r; |
| od = new_od; |
| swapped = false; |
| continue; |
| } |
| i--; |
| } |
| break; |
| } |
| } |
| |
| error = DIV_ROUND_CLOSEST_ULL(f * inv_ratio, r * od); |
| /* The lower 16 bits are spurious */ |
| error = abs((error - BIT(32))) >> 16; |
| |
| if (error < best_error) { |
| best->r = r; |
| best->f = f; |
| best->od = od; |
| best_error = error; |
| } |
| break; |
| } |
| } while (f < 64 && i + 1 < ARRAY_SIZE(factors) && error != 0); |
| |
| if (best_error == S64_MAX) |
| return -EINVAL; |
| |
| log_debug("best error %lld\n", best_error); |
| return 0; |
| } |
| |
| static ulong k210_pll_set_rate(struct clk *clk, ulong rate) |
| { |
| int err; |
| long long rate_in = clk_get_parent_rate(clk); |
| struct k210_pll_config config = {}; |
| struct k210_pll *pll = to_k210_pll(clk); |
| u32 reg; |
| |
| if (rate_in < 0) |
| return rate_in; |
| |
| log_debug("Calculating parameters with rate=%lu and rate_in=%lld\n", |
| rate, rate_in); |
| err = k210_pll_calc_config(rate, rate_in, &config); |
| if (err) |
| return err; |
| log_debug("Got r=%u f=%u od=%u\n", config.r, config.f, config.od); |
| |
| /* |
| * Don't use clk_disable as it might not actually disable the pll due to |
| * refcounting |
| */ |
| k210_pll_disable(clk); |
| |
| reg = readl(pll->reg); |
| reg &= ~K210_PLL_CLKR |
| & ~K210_PLL_CLKF |
| & ~K210_PLL_CLKOD |
| & ~K210_PLL_BWADJ; |
| reg |= FIELD_PREP(K210_PLL_CLKR, config.r - 1) |
| | FIELD_PREP(K210_PLL_CLKF, config.f - 1) |
| | FIELD_PREP(K210_PLL_CLKOD, config.od - 1) |
| | FIELD_PREP(K210_PLL_BWADJ, config.f - 1); |
| writel(reg, pll->reg); |
| |
| err = k210_pll_enable(clk); |
| if (err) |
| return err; |
| |
| serial_setbrg(); |
| return clk_get_rate(clk); |
| } |
| #endif /* CONFIG_CLK_K210_SET_RATE */ |
| |
| static ulong k210_pll_get_rate(struct clk *clk) |
| { |
| long long rate_in = clk_get_parent_rate(clk); |
| struct k210_pll *pll = to_k210_pll(clk); |
| u64 r, f, od; |
| u32 reg = readl(pll->reg); |
| |
| if (rate_in < 0 || (reg & K210_PLL_BYPASS)) |
| return rate_in; |
| |
| if (!(reg & K210_PLL_PWRD)) |
| return 0; |
| |
| r = FIELD_GET(K210_PLL_CLKR, reg) + 1; |
| f = FIELD_GET(K210_PLL_CLKF, reg) + 1; |
| od = FIELD_GET(K210_PLL_CLKOD, reg) + 1; |
| |
| return DIV_ROUND_DOWN_ULL(((u64)rate_in) * f, r * od); |
| } |
| |
| /* |
| * Wait for the PLL to be locked. If the PLL is not locked, try clearing the |
| * slip before retrying |
| */ |
| static void k210_pll_waitfor_lock(struct k210_pll *pll) |
| { |
| u32 mask = GENMASK(pll->width - 1, 0) << pll->shift; |
| |
| while (true) { |
| u32 reg = readl(pll->lock); |
| |
| if ((reg & mask) == mask) |
| break; |
| |
| reg |= BIT(pll->shift + K210_PLL_CLEAR_SLIP); |
| writel(reg, pll->lock); |
| } |
| } |
| |
| /* Adapted from sysctl_pll_enable */ |
| static int k210_pll_enable(struct clk *clk) |
| { |
| struct k210_pll *pll = to_k210_pll(clk); |
| u32 reg = readl(pll->reg); |
| |
| if ((reg | K210_PLL_PWRD) && !(reg | K210_PLL_RESET)) |
| return 0; |
| |
| reg |= K210_PLL_PWRD; |
| writel(reg, pll->reg); |
| |
| /* Ensure reset is low before asserting it */ |
| reg &= ~K210_PLL_RESET; |
| writel(reg, pll->reg); |
| reg |= K210_PLL_RESET; |
| writel(reg, pll->reg); |
| nop(); |
| nop(); |
| reg &= ~K210_PLL_RESET; |
| writel(reg, pll->reg); |
| |
| k210_pll_waitfor_lock(pll); |
| |
| reg &= ~K210_PLL_BYPASS; |
| writel(reg, pll->reg); |
| |
| return 0; |
| } |
| |
| static int k210_pll_disable(struct clk *clk) |
| { |
| struct k210_pll *pll = to_k210_pll(clk); |
| u32 reg = readl(pll->reg); |
| |
| /* |
| * Bypassing before powering off is important so child clocks don't stop |
| * working. This is especially important for pll0, the indirect parent |
| * of the cpu clock. |
| */ |
| reg |= K210_PLL_BYPASS; |
| writel(reg, pll->reg); |
| |
| reg &= ~K210_PLL_PWRD; |
| writel(reg, pll->reg); |
| return 0; |
| } |
| |
| const struct clk_ops k210_pll_ops = { |
| .get_rate = k210_pll_get_rate, |
| #ifdef CONFIG_CLK_K210_SET_RATE |
| .set_rate = k210_pll_set_rate, |
| #endif |
| .enable = k210_pll_enable, |
| .disable = k210_pll_disable, |
| }; |
| |
| struct clk *k210_register_pll_struct(const char *name, const char *parent_name, |
| struct k210_pll *pll) |
| { |
| int ret; |
| struct clk *clk = &pll->clk; |
| |
| ret = clk_register(clk, CLK_K210_PLL, name, parent_name); |
| if (ret) |
| return ERR_PTR(ret); |
| return clk; |
| } |
| |
| struct clk *k210_register_pll(const char *name, const char *parent_name, |
| void __iomem *reg, void __iomem *lock, u8 shift, |
| u8 width) |
| { |
| struct clk *clk; |
| struct k210_pll *pll; |
| |
| pll = kzalloc(sizeof(*pll), GFP_KERNEL); |
| if (!pll) |
| return ERR_PTR(-ENOMEM); |
| pll->reg = reg; |
| pll->lock = lock; |
| pll->shift = shift; |
| pll->width = width; |
| |
| clk = k210_register_pll_struct(name, parent_name, pll); |
| if (IS_ERR(clk)) |
| kfree(pll); |
| return clk; |
| } |
| |
| U_BOOT_DRIVER(k210_pll) = { |
| .name = CLK_K210_PLL, |
| .id = UCLASS_CLK, |
| .ops = &k210_pll_ops, |
| }; |