Привет! Кто-нибудь сталкивался с похожей проблемой? После обновления до ceph-14.2.11 OSD вылетает случайным образом, эта проблема случилась дважды: ceph crash info 2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b { "os_version_id": "10", "assert_condition": "is_valid_io(off, len)", "utsname_release": "5.4.65-1-pve", "os_name": "Debian GNU/Linux 10 (buster)", "entity_name": "osd.13", "assert_file": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc", "timestamp": "2020-11-03 04:50:37.808243Z", "process_name": "ceph-osd", "utsname_machine": "x86_64", "assert_line": 864, "utsname_sysname": "Linux", "os_version": "10 (buster)", "os_id": "10", "assert_thread_name": "tp_osd_tp", "utsname_version": "#1 SMP PVE 5.4.65-1 (Пн, 21 Сен 2020 15:40:22 +0200)", "backtrace": [ "(()+0x12730) [0x7fa137293730]", "(gsignal()+0x10b) [0x7fa136d767bb]", "(abort()+0x121) [0x7fa136d61535]", "(ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x1a3) [0x557b97d2d419]", "(()+0x5115a0) [0x557b97d2d5a0]", "(KernelDevice::aio_write(unsigned long, ceph::buffer::v14_2_0::list&, IOContext*, bool, int)+0x90) [0x557b983a1570]", "(BlueStore::_do_alloc_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>, boost::intrusive_ptr<BlueStore::Onode>, BlueStore::WriteContext*)+0x2237) [0x557b98281247]", "(BlueStore::_do_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0x318) [0x557b982a9ef8]", "(BlueStore::_write(BlueStore::TransContext*, boost::intrusive_ptr<BlueStore::Collection>&, boost::intrusive_ptr<BlueStore::Onode>&, unsigned long, unsigned long, ceph::buffer::v14_2_0::list&, unsigned int)+0xda) [0x557b982aadfa]", "(BlueStore::_txc_add_transaction(BlueStore::TransContext*, ObjectStore::Transaction*)+0x1671) [0x557b982ae481]", "(BlueStore::queue_transactions(boost::intrusive_ptr<ObjectStore::CollectionImpl>&, std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<TrackedOp>, ThreadPool::TPHandle*)+0x3c8) [0x557b982afeb8]", "(non-virtual thunk to PrimaryLogPG::queue_transactions(std::vector<ObjectStore::Transaction, std::allocator<ObjectStore::Transaction> >&, boost::intrusive_ptr<OpRequest>)+0x54) [0x557b9801d8b4]", "(ReplicatedBackend::submit_transaction(hobject_t const&, object_stat_sum_t const&, eversion_t const&, std::unique_ptr<PGTransaction, std::default_delete<PGTransaction> >&&, eversion_t const&, eversion_t const&, std::vector<pg_log_entry_t, std::allocator<pg_log_entry_t> > const&, boost: ptional<pg_hit_set_history_t>&, Context*, unsigned long, osd_reqid_t, boost::intrusive_ptr<OpRequest>)+0x644) [0x557b981133f4]", "(PrimaryLogPG::issue_repop(PrimaryLogPG::RepGather*, PrimaryLogPG::OpContext*)+0x102a) [0x557b97f7e0da]", "(PrimaryLogPG::execute_ctx(PrimaryLogPG::OpContext*)+0x110c) [0x557b97fdf26c]", "(PrimaryLogPG::do_op(boost::intrusive_ptr<OpRequest>&)+0x3101) [0x557b97fe2ba1]", "(PrimaryLogPG::do_request(boost::intrusive_ptr<OpRequest>&, ThreadPool::TPHandle&)+0xd77) [0x557b97fe4fa7]", "(OSD::dequeue_op(boost::intrusive_ptr<PG>, boost::intrusive_ptr<OpRequest>, ThreadPool::TPHandle&)+0x392) [0x557b97e10f02]", "(PGOpItem::run(OSD*, OSDShard*, boost::intrusive_ptr<PG>&, ThreadPool::TPHandle&)+0x62) [0x557b980b4e92]", "(OSD::ShardedOpWQ::_process(unsigned int, ceph::heartbeat_handle_d*)+0x7d7) [0x557b97e2cba7]", "(ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x5b4) [0x557b983f90c4]", "(ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0x557b983fbad0]", "(()+0x7fa3) [0x7fa137288fa3]", "(clone()+0x3f) [0x7fa136e384cf]" ], "utsname_hostname": "xxxxxxx", "assert_msg": "/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: В функции 'virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)' поток 7fa109af2700 время 2020-11-03 05:50:37.797725\n/build/ceph-JY24tx/ceph-14.2.11/src/os/bluestore/KernelDevice.cc: 864: УСПЕШНО ceph_assert(is_valid_io(off, len))\n", "crash_id": "2020-11-03_04:50:37.808243Z_e8e9fd54-27a2-4039-82ff-e13d3e7ca40b", "assert_func": "virtual int KernelDevice::aio_write(uint64_t, ceph::bufferlist&, IOContext*, bool, int)", "ceph_version": "14.2.11" }
Ceph OSD авария, Proxmox Виртуальная Среда
|
03.11.2020 10:24:00
|
|
|
|
|
|
18.11.2020 21:31:00
*bump* произошел второй раз на другом узле в течение 24 часов.
|
|
|
|
|
|
19.12.2020 11:28:00
На этом есть продвижение: ПР еще не слит в основной ветке, так что, полагаю, мы увидим этот (важный) фикс только в версии 14.2.16 или позже.
|
|
|
|
|
|
21.12.2020 10:51:00
Ceph 14.2.16 выпущен, но патч еще не был объединен.
|
||||
|
|
|
|||
Читают тему
