Newer
Older
36001
36002
36003
36004
36005
36006
36007
36008
36009
36010
36011
36012
36013
36014
36015
36016
36017
36018
36019
36020
36021
36022
36023
36024
36025
36026
36027
36028
36029
36030
36031
36032
36033
36034
36035
36036
36037
36038
36039
36040
36041
36042
36043
36044
36045
36046
36047
36048
36049
36050
36051
36052
36053
36054
36055
36056
36057
36058
36059
36060
36061
36062
36063
36064
36065
36066
36067
36068
36069
36070
36071
36072
36073
36074
36075
36076
36077
36078
36079
36080
36081
36082
36083
36084
36085
36086
36087
36088
36089
36090
36091
36092
36093
36094
36095
36096
36097
36098
36099
36100
36101
36102
36103
36104
36105
36106
36107
36108
36109
36110
36111
36112
36113
36114
36115
36116
36117
36118
36119
36120
36121
36122
36123
36124
36125
36126
36127
36128
36129
36130
36131
36132
36133
36134
36135
36136
36137
36138
36139
36140
36141
36142
36143
36144
36145
36146
36147
36148
36149
36150
36151
36152
36153
36154
36155
36156
36157
36158
36159
36160
36161
36162
36163
36164
36165
36166
36167
36168
36169
36170
36171
36172
36173
36174
36175
36176
36177
36178
36179
36180
36181
36182
36183
36184
36185
36186
36187
36188
36189
36190
36191
36192
36193
36194
36195
36196
36197
36198
36199
36200
36201
36202
36203
36204
36205
36206
36207
36208
36209
36210
36211
36212
36213
36214
36215
36216
36217
36218
36219
36220
36221
36222
36223
36224
36225
36226
36227
36228
36229
36230
36231
36232
36233
36234
36235
36236
36237
36238
36239
36240
36241
36242
36243
36244
36245
36246
36247
36248
36249
36250
36251
36252
36253
36254
36255
36256
36257
36258
36259
36260
36261
36262
36263
36264
36265
36266
36267
36268
36269
36270
36271
36272
36273
36274
36275
36276
36277
36278
36279
36280
36281
36282
36283
36284
36285
36286
36287
36288
36289
36290
36291
36292
36293
36294
36295
36296
36297
36298
36299
36300
36301
36302
36303
36304
36305
36306
36307
36308
36309
36310
36311
36312
36313
36314
36315
36316
36317
36318
36319
36320
36321
36322
36323
36324
36325
36326
36327
36328
36329
36330
36331
36332
36333
36334
36335
36336
36337
36338
36339
36340
36341
36342
36343
36344
36345
36346
36347
36348
36349
36350
36351
36352
36353
36354
36355
36356
36357
36358
36359
36360
36361
36362
36363
36364
36365
36366
36367
36368
36369
36370
36371
36372
36373
36374
36375
36376
36377
36378
36379
36380
36381
36382
36383
36384
36385
36386
36387
36388
36389
36390
36391
36392
36393
36394
36395
36396
36397
36398
36399
36400
36401
36402
36403
36404
36405
36406
36407
36408
36409
36410
36411
36412
36413
36414
36415
36416
36417
36418
36419
36420
36421
36422
36423
36424
36425
36426
36427
36428
36429
36430
36431
36432
36433
36434
36435
36436
36437
36438
36439
36440
36441
36442
36443
36444
36445
36446
36447
36448
36449
36450
36451
36452
36453
36454
36455
36456
36457
36458
36459
36460
36461
36462
36463
36464
36465
36466
36467
36468
36469
36470
36471
36472
36473
36474
36475
36476
36477
36478
36479
36480
36481
36482
36483
36484
36485
36486
36487
36488
36489
36490
36491
36492
36493
36494
36495
36496
36497
36498
36499
36500
36501
36502
36503
36504
36505
36506
36507
36508
36509
36510
36511
36512
36513
36514
36515
36516
36517
36518
36519
36520
36521
36522
36523
36524
36525
36526
36527
36528
36529
36530
36531
36532
36533
36534
36535
36536
36537
36538
36539
36540
36541
36542
36543
36544
36545
36546
36547
36548
36549
36550
36551
36552
36553
36554
36555
36556
36557
36558
36559
36560
36561
36562
36563
36564
36565
36566
36567
36568
36569
36570
36571
36572
36573
36574
36575
36576
36577
36578
36579
36580
36581
36582
36583
36584
36585
36586
36587
36588
36589
36590
36591
36592
36593
36594
36595
36596
36597
36598
36599
36600
36601
36602
36603
36604
36605
36606
36607
36608
36609
36610
36611
36612
36613
36614
36615
36616
36617
36618
36619
36620
36621
36622
36623
36624
36625
36626
36627
36628
36629
36630
36631
36632
36633
36634
36635
36636
36637
36638
36639
36640
36641
36642
36643
36644
36645
36646
36647
36648
36649
36650
36651
36652
36653
36654
36655
36656
36657
36658
36659
36660
36661
36662
36663
36664
36665
36666
36667
36668
36669
36670
36671
36672
36673
36674
36675
36676
36677
36678
36679
36680
36681
36682
36683
36684
36685
36686
36687
36688
36689
36690
36691
36692
36693
36694
36695
36696
36697
36698
36699
36700
36701
36702
36703
36704
36705
36706
36707
36708
36709
36710
36711
36712
36713
36714
36715
36716
36717
36718
36719
36720
36721
36722
36723
36724
36725
36726
36727
36728
36729
36730
36731
36732
36733
36734
36735
36736
36737
36738
36739
36740
36741
36742
36743
36744
36745
36746
36747
36748
36749
36750
36751
36752
36753
36754
36755
36756
36757
36758
36759
36760
36761
36762
36763
36764
36765
36766
36767
36768
36769
36770
36771
36772
36773
36774
36775
36776
36777
36778
36779
36780
36781
36782
36783
36784
36785
36786
36787
36788
36789
36790
36791
36792
36793
36794
36795
36796
36797
36798
36799
36800
36801
36802
36803
36804
36805
36806
36807
36808
36809
36810
36811
36812
36813
36814
36815
36816
36817
36818
36819
36820
36821
36822
36823
36824
36825
36826
36827
36828
36829
36830
36831
36832
36833
36834
36835
36836
36837
36838
36839
36840
36841
36842
36843
36844
36845
36846
36847
36848
36849
36850
36851
36852
36853
36854
36855
36856
36857
36858
36859
36860
36861
36862
36863
36864
36865
36866
36867
36868
36869
36870
36871
36872
36873
36874
36875
36876
36877
36878
36879
36880
36881
36882
36883
36884
36885
36886
36887
36888
36889
36890
36891
36892
36893
36894
36895
36896
36897
36898
36899
36900
36901
36902
36903
36904
36905
36906
36907
36908
36909
36910
36911
36912
36913
36914
36915
36916
36917
36918
36919
36920
36921
36922
36923
36924
36925
36926
36927
36928
36929
36930
36931
36932
36933
36934
36935
36936
36937
36938
36939
36940
36941
36942
36943
36944
36945
36946
36947
36948
36949
36950
36951
36952
36953
36954
36955
36956
36957
36958
36959
36960
36961
36962
36963
36964
36965
36966
36967
36968
36969
36970
36971
36972
36973
36974
36975
36976
36977
36978
36979
36980
36981
36982
36983
36984
36985
36986
36987
36988
36989
36990
36991
36992
36993
36994
36995
36996
36997
36998
36999
37000
+ IMustLock(dir);
+
+ name = &dentry->d_name;
+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
+ if (IS_ERR(path.dentry)) {
+ file = (void *)path.dentry;
+ pr_err("%pd lookup err %ld\n", dentry, PTR_ERR(path.dentry));
+ goto out;
+ }
+
+ /* no need to mnt_want_write() since we call dentry_open() later */
+ err = vfs_create(dir, path.dentry, 0666, NULL);
+ if (unlikely(err)) {
+ file = ERR_PTR(err);
+ pr_err("%pd create err %d\n", dentry, err);
+ goto out_dput;
+ }
+
+ path.mnt = base->mnt;
+ file = vfsub_dentry_open(&path,
+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+ /* | __FMODE_NONOTIFY */);
+ if (IS_ERR(file)) {
+ pr_err("%pd open err %ld\n", dentry, PTR_ERR(file));
+ goto out_dput;
+ }
+
+ delegated = NULL;
+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
+ au_xino_unlock_dir(&ldir);
+ do_unlock = 0;
+ if (unlikely(err == -EWOULDBLOCK)) {
+ pr_warn("cannot retry for NFSv4 delegation"
+ " for an internal unlink\n");
+ iput(delegated);
+ }
+ if (unlikely(err)) {
+ pr_err("%pd unlink err %d\n", dentry, err);
+ goto out_fput;
+ }
+
+ if (copy_src) {
+ /* no one can touch copy_src xino */
+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
+ if (unlikely(err)) {
+ pr_err("%pd copy err %d\n", dentry, err);
+ goto out_fput;
+ }
+ }
+ goto out_dput; /* success */
+
+out_fput:
+ fput(file);
+ file = ERR_PTR(err);
+out_dput:
+ dput(path.dentry);
+out:
+ if (do_unlock)
+ au_xino_unlock_dir(&ldir);
+ return file;
+}
+
+struct file *au_xino_file1(struct au_xino *xi)
+{
+ struct file *file;
+ unsigned int u, nfile;
+
+ file = NULL;
+ nfile = xi->xi_nfile;
+ for (u = 0; u < nfile; u++) {
+ file = xi->xi_file[u];
+ if (file)
+ break;
+ }
+
+ return file;
+}
+
+static int au_xino_file_set(struct au_xino *xi, int idx, struct file *file)
+{
+ int err;
+ struct file *f;
+ void *p;
+
+ if (file)
+ get_file(file);
+
+ err = 0;
+ f = NULL;
+ if (idx < xi->xi_nfile) {
+ f = xi->xi_file[idx];
+ if (f)
+ fput(f);
+ } else {
+ p = au_kzrealloc(xi->xi_file,
+ sizeof(*xi->xi_file) * xi->xi_nfile,
+ sizeof(*xi->xi_file) * (idx + 1),
+ GFP_NOFS, /*may_shrink*/0);
+ if (p) {
+ MtxMustLock(&xi->xi_mtx);
+ xi->xi_file = p;
+ xi->xi_nfile = idx + 1;
+ } else {
+ err = -ENOMEM;
+ if (file)
+ fput(file);
+ goto out;
+ }
+ }
+ xi->xi_file[idx] = file;
+
+out:
+ return err;
+}
+
+/*
+ * if @xinew->xi is not set, then create new xigen file.
+ */
+struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew)
+{
+ struct file *file;
+ int err;
+
+ SiMustAnyLock(sb);
+
+ file = au_xino_create2(sb, xinew->base, xinew->copy_src);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ pr_err("%s[%d], err %d\n",
+ xinew->xi ? "xino" : "xigen",
+ xinew->idx, err);
+ goto out;
+ }
+
+ if (xinew->xi)
+ err = au_xino_file_set(xinew->xi, xinew->idx, file);
+ else {
+ BUG();
+ /* todo: make xigen file an array */
+ /* err = au_xigen_file_set(sb, xinew->idx, file); */
+ }
+ fput(file);
+ if (unlikely(err))
+ file = ERR_PTR(err);
+
+out:
+ return file;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * truncate xino files
+ */
+static int au_xino_do_trunc(struct super_block *sb, aufs_bindex_t bindex,
+ int idx, struct kstatfs *st)
+{
+ int err;
+ blkcnt_t blocks;
+ struct file *file, *new_xino;
+ struct au_xi_new xinew = {
+ .idx = idx
+ };
+
+ err = 0;
+ xinew.xi = au_sbr(sb, bindex)->br_xino;
+ file = au_xino_file(xinew.xi, idx);
+ if (!file)
+ goto out;
+
+ xinew.base = &file->f_path;
+ err = vfs_statfs(xinew.base, st);
+ if (unlikely(err)) {
+ AuErr1("statfs err %d, ignored\n", err);
+ err = 0;
+ goto out;
+ }
+
+ blocks = file_inode(file)->i_blocks;
+ pr_info("begin truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
+ bindex, idx, (u64)blocks, st->f_bfree, st->f_blocks);
+
+ xinew.copy_src = file;
+ new_xino = au_xi_new(sb, &xinew);
+ if (IS_ERR(new_xino)) {
+ err = PTR_ERR(new_xino);
+ pr_err("xino(b%d-%d), err %d, ignored\n", bindex, idx, err);
+ goto out;
+ }
+
+ err = vfs_statfs(&new_xino->f_path, st);
+ if (!err)
+ pr_info("end truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
+ bindex, idx, (u64)file_inode(new_xino)->i_blocks,
+ st->f_bfree, st->f_blocks);
+ else {
+ AuErr1("statfs err %d, ignored\n", err);
+ err = 0;
+ }
+
+out:
+ return err;
+}
+
+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin)
+{
+ int err, i;
+ unsigned long jiffy;
+ aufs_bindex_t bbot;
+ struct kstatfs *st;
+ struct au_branch *br;
+ struct au_xino *xi;
+
+ err = -ENOMEM;
+ st = kmalloc(sizeof(*st), GFP_NOFS);
+ if (unlikely(!st))
+ goto out;
+
+ err = -EINVAL;
+ bbot = au_sbbot(sb);
+ if (unlikely(bindex < 0 || bbot < bindex))
+ goto out_st;
+
+ err = 0;
+ jiffy = jiffies;
+ br = au_sbr(sb, bindex);
+ xi = br->br_xino;
+ for (i = idx_begin; !err && i < xi->xi_nfile; i++)
+ err = au_xino_do_trunc(sb, bindex, i, st);
+ if (!err)
+ au_sbi(sb)->si_xino_jiffy = jiffy;
+
+out_st:
+ au_kfree_rcu(st);
+out:
+ return err;
+}
+
+struct xino_do_trunc_args {
+ struct super_block *sb;
+ struct au_branch *br;
+ int idx;
+};
+
+static void xino_do_trunc(void *_args)
+{
+ struct xino_do_trunc_args *args = _args;
+ struct super_block *sb;
+ struct au_branch *br;
+ struct inode *dir;
+ int err, idx;
+ aufs_bindex_t bindex;
+
+ err = 0;
+ sb = args->sb;
+ dir = d_inode(sb->s_root);
+ br = args->br;
+ idx = args->idx;
+
+ si_noflush_write_lock(sb);
+ ii_read_lock_parent(dir);
+ bindex = au_br_index(sb, br->br_id);
+ err = au_xino_trunc(sb, bindex, idx);
+ ii_read_unlock(dir);
+ if (unlikely(err))
+ pr_warn("err b%d, (%d)\n", bindex, err);
+ atomic_dec(&br->br_xino->xi_truncating);
+ au_lcnt_dec(&br->br_count);
+ si_write_unlock(sb);
+ au_nwt_done(&au_sbi(sb)->si_nowait);
+ au_kfree_rcu(args);
+}
+
+/*
+ * returns the index in the xi_file array whose corresponding file is necessary
+ * to truncate, or -1 which means no need to truncate.
+ */
+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
+{
+ int err;
+ unsigned int u;
+ struct kstatfs st;
+ struct au_sbinfo *sbinfo;
+ struct au_xino *xi;
+ struct file *file;
+
+ /* todo: si_xino_expire and the ratio should be customizable */
+ sbinfo = au_sbi(sb);
+ if (time_before(jiffies,
+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
+ return -1;
+
+ /* truncation border */
+ xi = br->br_xino;
+ for (u = 0; u < xi->xi_nfile; u++) {
+ file = au_xino_file(xi, u);
+ if (!file)
+ continue;
+
+ err = vfs_statfs(&file->f_path, &st);
+ if (unlikely(err)) {
+ AuErr1("statfs err %d, ignored\n", err);
+ return -1;
+ }
+ if (div64_u64(st.f_bfree * 100, st.f_blocks)
+ >= AUFS_XINO_DEF_TRUNC)
+ return u;
+ }
+
+ return -1;
+}
+
+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
+{
+ int idx;
+ struct xino_do_trunc_args *args;
+ int wkq_err;
+
+ idx = xino_trunc_test(sb, br);
+ if (idx < 0)
+ return;
+
+ if (atomic_inc_return(&br->br_xino->xi_truncating) > 1)
+ goto out;
+
+ /* lock and kfree() will be called in trunc_xino() */
+ args = kmalloc(sizeof(*args), GFP_NOFS);
+ if (unlikely(!args)) {
+ AuErr1("no memory\n");
+ goto out;
+ }
+
+ au_lcnt_inc(&br->br_count);
+ args->sb = sb;
+ args->br = br;
+ args->idx = idx;
+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
+ if (!wkq_err)
+ return; /* success */
+
+ pr_err("wkq %d\n", wkq_err);
+ au_lcnt_dec(&br->br_count);
+ au_kfree_rcu(args);
+
+out:
+ atomic_dec(&br->br_xino->xi_truncating);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_xi_calc {
+ int idx;
+ loff_t pos;
+};
+
+static void au_xi_calc(struct super_block *sb, ino_t h_ino,
+ struct au_xi_calc *calc)
+{
+ loff_t maxent;
+
+ maxent = au_xi_maxent(sb);
+ calc->idx = div64_u64_rem(h_ino, maxent, &calc->pos);
+ calc->pos *= sizeof(ino_t);
+}
+
+static int au_xino_do_new_async(struct super_block *sb, struct au_branch *br,
+ struct au_xi_calc *calc)
+{
+ int err;
+ struct file *file;
+ struct au_xino *xi = br->br_xino;
+ struct au_xi_new xinew = {
+ .xi = xi
+ };
+
+ SiMustAnyLock(sb);
+
+ err = 0;
+ if (!xi)
+ goto out;
+
+ mutex_lock(&xi->xi_mtx);
+ file = au_xino_file(xi, calc->idx);
+ if (file)
+ goto out_mtx;
+
+ file = au_xino_file(xi, /*idx*/-1);
+ AuDebugOn(!file);
+ xinew.idx = calc->idx;
+ xinew.base = &file->f_path;
+ /* xinew.copy_src = NULL; */
+ file = au_xi_new(sb, &xinew);
+ if (IS_ERR(file))
+ err = PTR_ERR(file);
+
+out_mtx:
+ mutex_unlock(&xi->xi_mtx);
+out:
+ return err;
+}
+
+struct au_xino_do_new_async_args {
+ struct super_block *sb;
+ struct au_branch *br;
+ struct au_xi_calc calc;
+ ino_t ino;
+};
+
+struct au_xi_writing {
+ struct hlist_bl_node node;
+ ino_t h_ino, ino;
+};
+
+static int au_xino_do_write(vfs_writef_t write, struct file *file,
+ struct au_xi_calc *calc, ino_t ino);
+
+static void au_xino_call_do_new_async(void *args)
+{
+ struct au_xino_do_new_async_args *a = args;
+ struct au_branch *br;
+ struct super_block *sb;
+ struct au_sbinfo *sbi;
+ struct inode *root;
+ struct file *file;
+ struct au_xi_writing *del, *p;
+ struct hlist_bl_head *hbl;
+ struct hlist_bl_node *pos;
+ int err;
+
+ br = a->br;
+ sb = a->sb;
+ sbi = au_sbi(sb);
+ si_noflush_read_lock(sb);
+ root = d_inode(sb->s_root);
+ ii_read_lock_child(root);
+ err = au_xino_do_new_async(sb, br, &a->calc);
+ if (unlikely(err)) {
+ AuIOErr("err %d\n", err);
+ goto out;
+ }
+
+ file = au_xino_file(br->br_xino, a->calc.idx);
+ AuDebugOn(!file);
+ err = au_xino_do_write(sbi->si_xwrite, file, &a->calc, a->ino);
+ if (unlikely(err)) {
+ AuIOErr("err %d\n", err);
+ goto out;
+ }
+
+ del = NULL;
+ hbl = &br->br_xino->xi_writing;
+ hlist_bl_lock(hbl);
+ au_hbl_for_each(pos, hbl) {
+ p = container_of(pos, struct au_xi_writing, node);
+ if (p->ino == a->ino) {
+ del = p;
+ hlist_bl_del(&p->node);
+ break;
+ }
+ }
+ hlist_bl_unlock(hbl);
+ au_kfree_rcu(del);
+
+out:
+ au_lcnt_dec(&br->br_count);
+ ii_read_unlock(root);
+ si_read_unlock(sb);
+ au_nwt_done(&sbi->si_nowait);
+ au_kfree_rcu(a);
+}
+
+/*
+ * create a new xino file asynchronously
+ */
+static int au_xino_new_async(struct super_block *sb, struct au_branch *br,
+ struct au_xi_calc *calc, ino_t ino)
+{
+ int err;
+ struct au_xino_do_new_async_args *arg;
+
+ err = -ENOMEM;
+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
+ if (unlikely(!arg))
+ goto out;
+
+ arg->sb = sb;
+ arg->br = br;
+ arg->calc = *calc;
+ arg->ino = ino;
+ au_lcnt_inc(&br->br_count);
+ err = au_wkq_nowait(au_xino_call_do_new_async, arg, sb, AuWkq_NEST);
+ if (unlikely(err)) {
+ pr_err("wkq %d\n", err);
+ au_lcnt_dec(&br->br_count);
+ au_kfree_rcu(arg);
+ }
+
+out:
+ return err;
+}
+
+/*
+ * read @ino from xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ */
+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+ ino_t *ino)
+{
+ int err;
+ ssize_t sz;
+ struct au_xi_calc calc;
+ struct au_sbinfo *sbinfo;
+ struct file *file;
+ struct au_xino *xi;
+ struct hlist_bl_head *hbl;
+ struct hlist_bl_node *pos;
+ struct au_xi_writing *p;
+
+ *ino = 0;
+ if (!au_opt_test(au_mntflags(sb), XINO))
+ return 0; /* no xino */
+
+ err = 0;
+ au_xi_calc(sb, h_ino, &calc);
+ xi = au_sbr(sb, bindex)->br_xino;
+ file = au_xino_file(xi, calc.idx);
+ if (!file) {
+ hbl = &xi->xi_writing;
+ hlist_bl_lock(hbl);
+ au_hbl_for_each(pos, hbl) {
+ p = container_of(pos, struct au_xi_writing, node);
+ if (p->h_ino == h_ino) {
+ AuDbg("hi%llu, i%llu, found\n",
+ (u64)p->h_ino, (u64)p->ino);
+ *ino = p->ino;
+ break;
+ }
+ }
+ hlist_bl_unlock(hbl);
+ return 0;
+ } else if (vfsub_f_size_read(file) < calc.pos + sizeof(*ino))
+ return 0; /* no xino */
+
+ sbinfo = au_sbi(sb);
+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &calc.pos);
+ if (sz == sizeof(*ino))
+ return 0; /* success */
+
+ err = sz;
+ if (unlikely(sz >= 0)) {
+ err = -EIO;
+ AuIOErr("xino read error (%zd)\n", sz);
+ }
+ return err;
+}
+
+static int au_xino_do_write(vfs_writef_t write, struct file *file,
+ struct au_xi_calc *calc, ino_t ino)
+{
+ ssize_t sz;
+
+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &calc->pos);
+ if (sz == sizeof(ino))
+ return 0; /* success */
+
+ AuIOErr("write failed (%zd)\n", sz);
+ return -EIO;
+}
+
+/*
+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ * even if @ino is zero, it is written to the xinofile and means no entry.
+ * if the size of the xino file on a specific filesystem exceeds the watermark,
+ * try truncating it.
+ */
+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+ ino_t ino)
+{
+ int err;
+ unsigned int mnt_flags;
+ struct au_xi_calc calc;
+ struct file *file;
+ struct au_branch *br;
+ struct au_xino *xi;
+ struct au_xi_writing *p;
+
+ SiMustAnyLock(sb);
+
+ mnt_flags = au_mntflags(sb);
+ if (!au_opt_test(mnt_flags, XINO))
+ return 0;
+
+ au_xi_calc(sb, h_ino, &calc);
+ br = au_sbr(sb, bindex);
+ xi = br->br_xino;
+ file = au_xino_file(xi, calc.idx);
+ if (!file) {
+ /* store the inum pair into the list */
+ p = kmalloc(sizeof(*p), GFP_NOFS | __GFP_NOFAIL);
+ p->h_ino = h_ino;
+ p->ino = ino;
+ au_hbl_add(&p->node, &xi->xi_writing);
+
+ /* create and write a new xino file asynchronously */
+ err = au_xino_new_async(sb, br, &calc, ino);
+ if (!err)
+ return 0; /* success */
+ goto out;
+ }
+
+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, file, &calc, ino);
+ if (!err) {
+ br = au_sbr(sb, bindex);
+ if (au_opt_test(mnt_flags, TRUNC_XINO)
+ && au_test_fs_trunc_xino(au_br_sb(br)))
+ xino_try_trunc(sb, br);
+ return 0; /* success */
+ }
+
+out:
+ AuIOErr("write failed (%d)\n", err);
+ return -EIO;
+}
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos);
+
+/* todo: unnecessary to support mmap_sem since kernel-space? */
+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
+ loff_t *pos)
+{
+ ssize_t err;
+ mm_segment_t oldfs;
+ union {
+ void *k;
+ char __user *u;
+ } buf;
+ int i;
+ const int prevent_endless = 10;
+
+ i = 0;
+ buf.k = kbuf;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ do {
+ err = func(file, buf.u, size, pos);
+ if (err == -EINTR
+ && !au_wkq_test()
+ && fatal_signal_pending(current)) {
+ set_fs(oldfs);
+ err = xino_fread_wkq(func, file, kbuf, size, pos);
+ BUG_ON(err == -EINTR);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ }
+ } while (i++ < prevent_endless
+ && (err == -EAGAIN || err == -EINTR));
+ set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+ if (err > 0)
+ fsnotify_access(file->f_path.dentry);
+#endif
+
+ return err;
+}
+
+struct xino_fread_args {
+ ssize_t *errp;
+ vfs_readf_t func;
+ struct file *file;
+ void *buf;
+ size_t size;
+ loff_t *pos;
+};
+
+static void call_xino_fread(void *args)
+{
+ struct xino_fread_args *a = args;
+ *a->errp = xino_fread(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+ int wkq_err;
+ struct xino_fread_args args = {
+ .errp = &err,
+ .func = func,
+ .file = file,
+ .buf = buf,
+ .size = size,
+ .pos = pos
+ };
+
+ wkq_err = au_wkq_wait(call_xino_fread, &args);
+ if (unlikely(wkq_err))
+ err = wkq_err;
+
+ return err;
+}
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos);
+
+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+ mm_segment_t oldfs;
+ union {
+ void *k;
+ const char __user *u;
+ } buf;
+ int i;
+ const int prevent_endless = 10;
+
+ i = 0;
+ buf.k = kbuf;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ do {
+ err = func(file, buf.u, size, pos);
+ if (err == -EINTR
+ && !au_wkq_test()
+ && fatal_signal_pending(current)) {
+ set_fs(oldfs);
+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
+ BUG_ON(err == -EINTR);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ }
+ } while (i++ < prevent_endless
+ && (err == -EAGAIN || err == -EINTR));
+ set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+ if (err > 0)
+ fsnotify_modify(file->f_path.dentry);
+#endif
+
+ return err;
+}
+
+struct do_xino_fwrite_args {
+ ssize_t *errp;
+ vfs_writef_t func;
+ struct file *file;
+ void *buf;
+ size_t size;
+ loff_t *pos;
+};
+
+static void call_do_xino_fwrite(void *args)
+{
+ struct do_xino_fwrite_args *a = args;
+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+ int wkq_err;
+ struct do_xino_fwrite_args args = {
+ .errp = &err,
+ .func = func,
+ .file = file,
+ .buf = buf,
+ .size = size,
+ .pos = pos
+ };
+
+ /*
+ * it breaks RLIMIT_FSIZE and normal user's limit,
+ * users should care about quota and real 'filesystem full.'
+ */
+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
+ if (unlikely(wkq_err))
+ err = wkq_err;
+
+ return err;
+}
+
+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+
+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
+ lockdep_off();
+ err = do_xino_fwrite(func, file, buf, size, pos);
+ lockdep_on();
+ } else {
+ lockdep_off();
+ err = xino_fwrite_wkq(func, file, buf, size, pos);
+ lockdep_on();
+ }
+
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * inode number bitmap
+ */
+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
+static ino_t xib_calc_ino(unsigned long pindex, int bit)
+{
+ ino_t ino;
+
+ AuDebugOn(bit < 0 || page_bits <= bit);
+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
+ return ino;
+}
+
+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
+{
+ AuDebugOn(ino < AUFS_FIRST_INO);
+ ino -= AUFS_FIRST_INO;
+ *pindex = ino / page_bits;
+ *bit = ino % page_bits;
+}
+
+static int xib_pindex(struct super_block *sb, unsigned long pindex)
+{
+ int err;
+ loff_t pos;
+ ssize_t sz;
+ struct au_sbinfo *sbinfo;
+ struct file *xib;
+ unsigned long *p;
+
+ sbinfo = au_sbi(sb);
+ MtxMustLock(&sbinfo->si_xib_mtx);
+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
+ || !au_opt_test(sbinfo->si_mntflags, XINO));
+
+ if (pindex == sbinfo->si_xib_last_pindex)
+ return 0;
+
+ xib = sbinfo->si_xib;
+ p = sbinfo->si_xib_buf;
+ pos = sbinfo->si_xib_last_pindex;
+ pos *= PAGE_SIZE;
+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+ if (unlikely(sz != PAGE_SIZE))
+ goto out;
+
+ pos = pindex;
+ pos *= PAGE_SIZE;
+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
+ else {
+ memset(p, 0, PAGE_SIZE);
+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+ }
+ if (sz == PAGE_SIZE) {
+ sbinfo->si_xib_last_pindex = pindex;
+ return 0; /* success */
+ }
+
+out:
+ AuIOErr1("write failed (%zd)\n", sz);
+ err = sz;
+ if (sz >= 0)
+ err = -EIO;
+ return err;
+}
+
+static void au_xib_clear_bit(struct inode *inode)
+{
+ int err, bit;
+ unsigned long pindex;
+ struct super_block *sb;
+ struct au_sbinfo *sbinfo;
+
+ AuDebugOn(inode->i_nlink);
+
+ sb = inode->i_sb;
+ xib_calc_bit(inode->i_ino, &pindex, &bit);
+ AuDebugOn(page_bits <= bit);
+ sbinfo = au_sbi(sb);
+ mutex_lock(&sbinfo->si_xib_mtx);
+ err = xib_pindex(sb, pindex);
+ if (!err) {
+ clear_bit(bit, sbinfo->si_xib_buf);
+ sbinfo->si_xib_next_bit = bit;
+ }
+ mutex_unlock(&sbinfo->si_xib_mtx);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * truncate a xino bitmap file
+ */
+
+/* todo: slow */
+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
+{
+ int err, bit;
+ ssize_t sz;
+ unsigned long pindex;
+ loff_t pos, pend;
+ struct au_sbinfo *sbinfo;
+ vfs_readf_t func;
+ ino_t *ino;
+ unsigned long *p;
+
+ err = 0;
+ sbinfo = au_sbi(sb);
+ MtxMustLock(&sbinfo->si_xib_mtx);
+ p = sbinfo->si_xib_buf;
+ func = sbinfo->si_xread;
+ pend = vfsub_f_size_read(file);
+ pos = 0;
+ while (pos < pend) {
+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
+ err = sz;
+ if (unlikely(sz <= 0))
+ goto out;
+
+ err = 0;
+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
+ if (unlikely(*ino < AUFS_FIRST_INO))
+ continue;
+
+ xib_calc_bit(*ino, &pindex, &bit);
+ AuDebugOn(page_bits <= bit);
+ err = xib_pindex(sb, pindex);
+ if (!err)
+ set_bit(bit, p);
+ else
+ goto out;
+ }
+ }
+
+out:
+ return err;
+}
+
+static int xib_restore(struct super_block *sb)
+{
+ int err, i;
+ unsigned int nfile;
+ aufs_bindex_t bindex, bbot;
+ void *page;
+ struct au_branch *br;
+ struct au_xino *xi;
+ struct file *file;
+
+ err = -ENOMEM;
+ page = (void *)__get_free_page(GFP_NOFS);
+ if (unlikely(!page))
+ goto out;
+
+ err = 0;
+ bbot = au_sbbot(sb);
+ for (bindex = 0; !err && bindex <= bbot; bindex++)
+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0) {
+ br = au_sbr(sb, bindex);
+ xi = br->br_xino;
+ nfile = xi->xi_nfile;
+ for (i = 0; i < nfile; i++) {
+ file = au_xino_file(xi, i);
+ if (file)
+ err = do_xib_restore(sb, file, page);
+ }
+ } else
+ AuDbg("skip shared b%d\n", bindex);
+ free_page((unsigned long)page);
+
+out:
+ return err;
+}
+
+int au_xib_trunc(struct super_block *sb)
+{
+ int err;
+ ssize_t sz;
+ loff_t pos;
+ struct au_sbinfo *sbinfo;
+ unsigned long *p;
+ struct file *file;
+
+ SiMustWriteLock(sb);
+
+ err = 0;
+ sbinfo = au_sbi(sb);
+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
+ goto out;
+
+ file = sbinfo->si_xib;
+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
+ goto out;
+