Handle0x003E,DMItype17,40bytes MemoryDevice Array Handle:0x003C Error Information Handle:NotProvided Total Width:72bits Data Width:72bits Size:32GB Form Factor:DIMM Set:None Locator:P1_DIMMA1
驱动安装时发生报错 warning: the compiler differs from the one used to build the kernel The kernel was built by: x86_64-linux-gnu-gcc-12 (Ubuntu 12.3.0-1ubuntu122.04) 12.3.0 You are using: cc (Ubuntu 11.4.0-1ubuntu122.04) 11.4.0 Warning: Compiler version check failed: The major and minor number of the compiler used to compile the kernel: x86_64-linux-gnu-gcc-12 (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0, GNU ld (GNU Binutils for Ubuntu) 2.38 does not match the compiler used here:
gcc –version gcc (Ubuntu 12.3.0-1ubuntu1~22.04) 12.3.0 Copyright (C) 2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE
再次安装发生报错,发现是系统内核不匹配
make[3]: *** [scripts/Makefile.build:243: /tmp/selfgz11096/NVIDIA-Linux-x86_64-530.30.02/kernel/nvidia/i2c_nvswitch.o] Error 1 make[3]: Target ‘/tmp/selfgz11096/NVIDIA-Linux-x86_64-530.30.02/kernel/‘ not remade because of errors. make[2]: *** [/usr/src/linux-headers-6.8.0-40-generic/Makefile:1926: /tmp/selfgz11096/NVIDIA-Linux-x86_64-530.30.02/kernel] Error 2 make[2]: Target ‘modules’ not remade because of errors. make[1]: *** [Makefile:240: __sub-make] Error 2 make[1]: Target ‘modules’ not remade because of errors. make[1]: Leaving directory ‘/usr/src/linux-headers-6.8.0-40-generic’ make: *** [Makefile:82: modules] Error 2 ERROR: The nvidia kernel module was not created.
cat /boot/grub/grub.cfg # # DO NOT EDIT THIS FILE # # It is automatically generated by grub-mkconfig using templates # from /etc/grub.d and settings from /etc/default/grub #
### BEGIN /etc/grub.d/00_header ### if [ -s $prefix/grubenv ]; then sethave_grubenv=true load_env fi if [ "${initrdfail}" = 2 ]; then set initrdfail= elif [ "${initrdfail}" = 1 ]; then setnext_entry="${prev_entry}" set prev_entry= save_env prev_entry if [ "${next_entry}" ]; then setinitrdfail=2 fi fi if [ "${next_entry}" ] ; then setdefault="${next_entry}" set next_entry= save_env next_entry setboot_once=true else setdefault="1>2" fi
if [ x"${feature_menuentry_id}" = xy ]; then menuentry_id_option="--id" else menuentry_id_option="" fi
export menuentry_id_option
if [ "${prev_saved_entry}" ]; then setsaved_entry="${prev_saved_entry}" save_env saved_entry set prev_saved_entry= save_env prev_saved_entry setboot_once=true fi
function savedefault { if [ -z "${boot_once}" ]; then saved_entry="${chosen}" save_env saved_entry fi } function initrdfail { if [ -n "${have_grubenv}" ]; then if [ -n "${partuuid}" ]; then if [ -z "${initrdfail}" ]; then setinitrdfail=1 if [ -n "${boot_once}" ]; then setprev_entry="${default}" save_env prev_entry fi fi save_env initrdfail fi; fi } function recordfail { setrecordfail=1 if [ -n "${have_grubenv}" ]; then if [ -z "${boot_once}" ]; then save_env recordfail; fi; fi } function load_video { if [ x$feature_all_video_module = xy ]; then insmod all_video else insmod efi_gop insmod efi_uga insmod ieee1275_fb insmod vbe insmod vga insmod video_bochs insmod video_cirrus fi }
if [ x$feature_default_font_path = xy ] ; then font=unicode else insmod part_gpt insmod ext2 setroot='hd0,gpt2' if [ x$feature_platform_search_hint = xy ]; then search --no-floppy --fs-uuid --set=root --hint-bios=hd0,gpt2 --hint-efi=hd0,gpt2 --hint-baremetal=ahci0,gpt2 670c96ba-e335-4331-914c-c6e7306b2e5f else search --no-floppy --fs-uuid --set=root 670c96ba-e335-4331-914c-c6e7306b2e5f fi font="/usr/share/grub/unicode.pf2" fi
if loadfont $font ; then setgfxmode=auto load_video insmod gfxterm setlocale_dir=$prefix/locale setlang=zh_CN insmod gettext fi terminal_output gfxterm if [ "${recordfail}" = 1 ] ; then settimeout=30 else if [ x$feature_timeout_style = xy ] ; then settimeout_style=hidden settimeout=0 # Fallback hidden-timeout code in case the timeout_style feature is # unavailable. elif sleep --interruptible 0 ; then settimeout=0 fi fi ### END /etc/grub.d/00_header ###
### BEGIN /etc/grub.d/05_debian_theme ### setmenu_color_normal=white/black setmenu_color_highlight=black/light-gray ### END /etc/grub.d/05_debian_theme ###
### BEGIN /etc/grub.d/10_linux ### function gfxmode { setgfxpayload="${1}" if [ "${1}" = "keep" ]; then setvt_handoff=vt.handoff=7 else set vt_handoff= fi } if [ "${recordfail}" != 1 ]; then if [ -e ${prefix}/gfxblacklist.txt ]; then if [ ${grub_platform} != pc ]; then setlinux_gfx_mode=keep elif hwmatch ${prefix}/gfxblacklist.txt 3; then if [ ${match} = 0 ]; then setlinux_gfx_mode=keep else setlinux_gfx_mode=text fi else setlinux_gfx_mode=text fi else setlinux_gfx_mode=keep fi else setlinux_gfx_mode=text fi export linux_gfx_mode menuentry 'Ubuntu' --class ubuntu --class gnu-linux --class gnu --class os $menuentry_id_option'gnulinux-simple-670c96ba-e335-4331-914c-c6e7306b2e5f' { recordfail load_video gfxmode $linux_gfx_mode insmod gzio if [ x$grub_platform = xxen ]; then insmod xzio; insmod lzopio; fi insmod part_gpt insmod ext2 setroot='hd0,gpt2' if [ x$feature_platform_search_hint = xy ]; then search --no-floppy --fs-uuid --set=root --hint-bios=hd0,gpt2 --hint-efi=hd0,gpt2 --hint-baremetal=ahci0,gpt2 670c96ba-e335-4331-914c-c6e7306b2e5f else search --no-floppy --fs-uuid --set=root 670c96ba-e335-4331-914c-c6e7306b2e5f fi linux /boot/vmlinuz-6.8.0-40-generic root=UUID=670c96ba-e335-4331-914c-c6e7306b2e5f ro quiet splash $vt_handoff initrd /boot/initrd.img-6.8.0-40-generic } submenu 'Advanced options for Ubuntu'$menuentry_id_option'gnulinux-advanced-670c96ba-e335-4331-914c-c6e7306b2e5f' { menuentry 'Ubuntu, with Linux 6.8.0-40-generic' --class ubuntu --class gnu-linux --class gnu --class os $menuentry_id_option'gnulinux-6.8.0-40-generic-advanced-670c96ba-e335-4331-914c-c6e7306b2e5f' { recordfail load_video gfxmode $linux_gfx_mode insmod gzio if [ x$grub_platform = xxen ]; then insmod xzio; insmod lzopio; fi insmod part_gpt insmod ext2 setroot='hd0,gpt2' if [ x$feature_platform_search_hint = xy ]; then search --no-floppy --fs-uuid --set=root --hint-bios=hd0,gpt2 --hint-efi=hd0,gpt2 --hint-baremetal=ahci0,gpt2 670c96ba-e335-4331-914c-c6e7306b2e5f else search --no-floppy --fs-uuid --set=root 670c96ba-e335-4331-914c-c6e7306b2e5f fi echo 'Loading Linux 6.8.0-40-generic ...' linux /boot/vmlinuz-6.8.0-40-generic root=UUID=670c96ba-e335-4331-914c-c6e7306b2e5f ro quiet splash $vt_handoff echo 'Loading initial ramdisk ...' initrd /boot/initrd.img-6.8.0-40-generic } menuentry 'Ubuntu, with Linux 6.8.0-40-generic (recovery mode)' --class ubuntu --class gnu-linux --class gnu --class os $menuentry_id_option'gnulinux-6.8.0-40-generic-recovery-670c96ba-e335-4331-914c-c6e7306b2e5f' { recordfail load_video insmod gzio if [ x$grub_platform = xxen ]; then insmod xzio; insmod lzopio; fi insmod part_gpt insmod ext2 setroot='hd0,gpt2' if [ x$feature_platform_search_hint = xy ]; then search --no-floppy --fs-uuid --set=root --hint-bios=hd0,gpt2 --hint-efi=hd0,gpt2 --hint-baremetal=ahci0,gpt2 670c96ba-e335-4331-914c-c6e7306b2e5f else search --no-floppy --fs-uuid --set=root 670c96ba-e335-4331-914c-c6e7306b2e5f fi echo 'Loading Linux 6.8.0-40-generic ...' linux /boot/vmlinuz-6.8.0-40-generic root=UUID=670c96ba-e335-4331-914c-c6e7306b2e5f ro recovery nomodeset dis_ucode_ldr echo 'Loading initial ramdisk ...' initrd /boot/initrd.img-6.8.0-40-generic } menuentry 'Ubuntu, with Linux 6.5.0-27-generic' --class ubuntu --class gnu-linux --class gnu --class os $menuentry_id_option'gnulinux-6.5.0-27-generic-advanced-670c96ba-e335-4331-914c-c6e7306b2e5f' { recordfail load_video gfxmode $linux_gfx_mode insmod gzio if [ x$grub_platform = xxen ]; then insmod xzio; insmod lzopio; fi insmod part_gpt insmod ext2 setroot='hd0,gpt2' if [ x$feature_platform_search_hint = xy ]; then search --no-floppy --fs-uuid --set=root --hint-bios=hd0,gpt2 --hint-efi=hd0,gpt2 --hint-baremetal=ahci0,gpt2 670c96ba-e335-4331-914c-c6e7306b2e5f else search --no-floppy --fs-uuid --set=root 670c96ba-e335-4331-914c-c6e7306b2e5f fi echo 'Loading Linux 6.5.0-27-generic ...' linux /boot/vmlinuz-6.5.0-27-generic root=UUID=670c96ba-e335-4331-914c-c6e7306b2e5f ro quiet splash $vt_handoff echo 'Loading initial ramdisk ...' initrd /boot/initrd.img-6.5.0-27-generic } menuentry 'Ubuntu, with Linux 6.5.0-27-generic (recovery mode)' --class ubuntu --class gnu-linux --class gnu --class os $menuentry_id_option'gnulinux-6.5.0-27-generic-recovery-670c96ba-e335-4331-914c-c6e7306b2e5f' { recordfail load_video insmod gzio if [ x$grub_platform = xxen ]; then insmod xzio; insmod lzopio; fi insmod part_gpt insmod ext2 setroot='hd0,gpt2' if [ x$feature_platform_search_hint = xy ]; then search --no-floppy --fs-uuid --set=root --hint-bios=hd0,gpt2 --hint-efi=hd0,gpt2 --hint-baremetal=ahci0,gpt2 670c96ba-e335-4331-914c-c6e7306b2e5f else search --no-floppy --fs-uuid --set=root 670c96ba-e335-4331-914c-c6e7306b2e5f fi echo 'Loading Linux 6.5.0-27-generic ...' linux /boot/vmlinuz-6.5.0-27-generic root=UUID=670c96ba-e335-4331-914c-c6e7306b2e5f ro recovery nomodeset dis_ucode_ldr echo 'Loading initial ramdisk ...' initrd /boot/initrd.img-6.5.0-27-generic } }
### END /etc/grub.d/10_linux ###
### BEGIN /etc/grub.d/10_linux_zfs ### ### END /etc/grub.d/10_linux_zfs ###
### BEGIN /etc/grub.d/20_linux_xen ###
### END /etc/grub.d/20_linux_xen ###
### BEGIN /etc/grub.d/20_memtest86+ ### ### END /etc/grub.d/20_memtest86+ ###
### BEGIN /etc/grub.d/30_os-prober ### ### END /etc/grub.d/30_os-prober ###
### BEGIN /etc/grub.d/30_uefi-firmware ### menuentry 'UEFI Firmware Settings'$menuentry_id_option'uefi-firmware' { fwsetup } ### END /etc/grub.d/30_uefi-firmware ###
### BEGIN /etc/grub.d/35_fwupd ### ### END /etc/grub.d/35_fwupd ###
### BEGIN /etc/grub.d/40_custom ### # This file provides an easy way to add custom menu entries. Simply type the # menu entries you want to add after this comment. Be careful not to change # the 'exec tail' line above. ### END /etc/grub.d/40_custom ###
### BEGIN /etc/grub.d/41_custom ### if [ -f ${config_directory}/custom.cfg ]; then source ${config_directory}/custom.cfg elif [ -z "${config_directory}" -a -f $prefix/custom.cfg ]; then source $prefix/custom.cfg fi ### END /etc/grub.d/41_custom ###
修改sudo vim /etc/default/grub,我的内核6.5.0-27是在第2个submenu的第3个menuentry位置上,所以改成如下
GRUB_DEFAULT=”1>2”
修改完成后,更新grub
sudo update-grub
重启后查看内核
uname -r 6.5.0-27-generic
从新安装驱动,继续保持,发现是兼容性问题,
ubuntu上安装cuda时报错 /tmp/selfgz2486/NVIDIA-Linux-x86_64-530.30.02/kernel/common/inc/nv-mm.h:88:60: warning: passing argument 4 of ‘get_user_pages’ makes pointer from integer without a cast [-Wint-conversion] 88 | return get_user_pages(current, current->mm, start, nr_pages, write, | ^~~~~~~~ | | | long unsigned int ./include/linux/mm.h:2431:59: note: expected ‘struct page **’ but argument is of type ‘long unsigned int’ 2431 | unsigned int gup_flags, struct page **pages); | ~~~~~~~~~~~~~~^~~~~ /tmp/selfgz2486/NVIDIA-Linux-x86_64-530.30.02/kernel/common/inc/nv-mm.h:88:16: error: too many arguments to function ‘get_user_pages’ 88 | return get_user_pages(current, current->mm, start, nr_pages, write,
Will install libglvnd libraries. Will install libEGL vendor library config file to /usr/share/glvnd/egl_vendor.d -> Searching for conflicting files: -> done. -> Installing 'NVIDIA Accelerated Graphics Driver for Linux-x86_64' (560.28.03): -> No NVIDIA modules detected in the initramfs. -> The initramfs will not be rebuild. executing: '/usr/sbin/ldconfig'... executing: '/usr/sbin/depmod -a '... executing: '/usr/bin/systemctl daemon-reload'... -> done. -> Driver file installation is complete. -> Running post-install sanity check: -> done. -> Post-install sanity check passed. -> Would you like to run the nvidia-xconfig utility to automatically update your X configuration file so that the NVIDIA X driver will be used when you restart X? Any pre-existing X configuration file will be backed up. (Answer: No) -> Installation of the NVIDIA Accelerated Graphics Driver for Linux-x86_64 (version: 560.28.03) is now complete. Please update your xorg.conf file as appropriate; see the file /usr/share/doc/NVIDIA_GLX-1.0/README.txt for details.
查看驱动, nvidia-smi
/usr/local/cuda/bin/nvcc -V nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2024 NVIDIA Corporation Built on Fri_Jun_14_16:34:21_PDT_2024 Cuda compilation tools, release 12.6, V12.6.20 Build cuda_12.6.r12.6/compiler.34431801_0
python train.py Using device: cuda /home/wac/johnson/anaconda3/envs/gpt/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias','classifier.weight','pre_classifier.bias','pre_classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. /home/wac/johnson/anaconda3/envs/gpt/lib/python3.10/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead warnings.warn( 0%| | 0/6 [00:00<?, ?it/s]/home/wac/johnson/anaconda3/envs/gpt/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py:79: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead. with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled): /home/wac/johnson/anaconda3/envs/gpt/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector. warnings.warn('Was asked to gather along dimension 0, but all ' {'train_runtime': 6.5146, 'train_samples_per_second': 38.375, 'train_steps_per_second': 0.921, 'train_loss': 0.3995473384857178, 'epoch': 1.0} 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00, 1.09s/it] Training completed on cuda