博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
微视linux scsi驱动错误中断处理
阅读量:4070 次
发布时间:2019-05-25

本文共 5979 字,大约阅读时间需要 19 分钟。

SCSI有两种常见的故障类型。

一种是卡自身故障发出错误中断或者中断里面自带错误信息;
另外一种是卡没有响应,丢中断触发定时器超时错误。

对于第一种故障模型:

硬件中断执行结束后会触发软中断,流程如下

void blk_done_softirq(struct softirq_action *h){	struct list_head *cpu_list, local_list;	local_irq_disable();	cpu_list = &__get_cpu_var(blk_cpu_done);	list_replace_init(cpu_list, &local_list);	local_irq_enable();	while (!list_empty(&local_list)) {//遍历链表,执行钩子函数		struct request *rq;		rq = list_entry(local_list.next, struct request, csd.list);		list_del_init(&rq->csd.list);		rq->q->softirq_done_fn(rq);		=>void scsi_softirq_done(struct request *rq)		{			/*解析底层控制器中断的处理结果,对于USB控制器,是由usb_stor_invoke_transport完成			 *错误一般是重试,走NEEDS_RETRY分支,最多重试5次,超过5次走default分支			 */			disposition = scsi_decide_disposition(cmd);						switch (disposition) {				case SUCCESS:					scsi_finish_command(cmd);					break;				case NEEDS_RETRY:					scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);					break;				case ADD_TO_MLQUEUE:					scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);					break;				default:					ret = !scsi_eh_scmd_add(cmd, 0);					=>int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)					{						struct Scsi_Host *shost = scmd->device->host;						unsigned long flags;						int ret = 0;						if (!shost->ehandler)							return 0;						spin_lock_irqsave(shost->host_lock, flags);						if (scsi_host_set_state(shost, SHOST_RECOVERY))							if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))								goto out_unlock;						ret = 1;						scmd->eh_eflags |= eh_flag;						list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);						shost->host_failed++;						scsi_eh_wakeup(shost);//唤醒异常处理线程						void scsi_eh_wakeup(struct Scsi_Host *shost)//内容太多,详见下面											 out_unlock:						spin_unlock_irqrestore(shost->host_lock, flags);						return ret;					}										if (ret)						scsi_finish_command(cmd);			}		}	}		}

错误处理线程流程如下:

void scsi_eh_wakeup(struct Scsi_Host *shost){	if (shost->host_busy == shost->host_failed) 		wake_up_process(shost->ehandler);		=>int scsi_error_handler(void *data)		{			struct Scsi_Host *shost = data;			/*			 * We use TASK_INTERRUPTIBLE so that the thread is not			 * counted against the load average as a running process.			 * We never actually get interrupted because kthread_run			 * disables signal delivery for the created thread.			 */			set_current_state(TASK_INTERRUPTIBLE);			while (!kthread_should_stop()) {				if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||					shost->host_failed != shost->host_busy) {					schedule();					set_current_state(TASK_INTERRUPTIBLE);					continue;				}				__set_current_state(TASK_RUNNING);				/*				 * We have a host that is failing for some reason.  Figure out				 * what we need to do to get it up and online again (if we can).				 * If we fail, we end up taking the thing offline.				 */				if (shost->transportt->eh_strategy_handler)//如果有自定义的钩子函数则执行自定义钩子函数					shost->transportt->eh_strategy_handler(shost);				else					scsi_unjam_host(shost);//系统默认钩子函数					=>void scsi_unjam_host(struct Scsi_Host *shost)					{						unsigned long flags;						LIST_HEAD(eh_work_q);						LIST_HEAD(eh_done_q);						spin_lock_irqsave(shost->host_lock, flags);						list_splice_init(&shost->eh_cmd_q, &eh_work_q);						spin_unlock_irqrestore(shost->host_lock, flags);						SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));						if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))							if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))								/*Scsiglue.c (drivers\usb\storage):	.eh_abort_handler =		command_abort,*/								=>int command_abort(struct scsi_cmnd *srb)								{									set_bit(US_FLIDX_TIMED_OUT, &us->dflags);//定时器唤醒									if (!test_bit(US_FLIDX_RESETTING, &us->dflags)) {										set_bit(US_FLIDX_ABORTING, &us->dflags);										usb_stor_stop_transport(us);										/* Stop the current URB transfer */										=>void usb_stor_stop_transport(struct us_data *us)										{											if (test_and_clear_bit(US_FLIDX_URB_ACTIVE, &us->dflags)) {												US_DEBUGP("-- cancelling URB\n");												usb_unlink_urb(us->current_urb);												=>int usb_unlink_urb(struct urb *urb)												{													return usb_hcd_unlink_urb(urb, -ECONNRESET);													=>int usb_hcd_unlink_urb (struct urb *urb, int status)													{														retval = unlink1(hcd, urb, status);														=>int unlink1(struct usb_hcd *hcd, struct urb *urb, int status)														{															value = usb_rh_urb_dequeue(hcd, urb, status);															=>int usb_rh_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)															{																usb_hcd_giveback_urb(hcd, urb, status);																=>void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)																{																	urb->status = status;																	urb->complete (urb);																}															}														}													}												}											}										}									}								}															scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);								=>void scsi_eh_ready_devs(struct Scsi_Host *shost,											struct list_head *work_q,											struct list_head *done_q)								{									if (!scsi_eh_stu(shost, work_q, done_q))//逐级从轻到重复位										if (!scsi_eh_bus_device_reset(shost, work_q, done_q))											if (!scsi_eh_target_reset(shost, work_q, done_q))												if (!scsi_eh_bus_reset(shost, work_q, done_q))													if (!scsi_eh_host_reset(work_q, done_q))														/*搞不定则将其踢出去*/														scsi_eh_offline_sdevs(work_q, done_q);								}						scsi_eh_flush_done_q(&eh_done_q);					}				/*				 * Note - if the above fails completely, the action is to take				 * individual devices offline and flush the queue of any				 * outstanding requests that may have been pending.  When we				 * restart, we restart any I/O to any other devices on the bus				 * which are still online.				 */				scsi_restart_operations(shost);				set_current_state(TASK_INTERRUPTIBLE);			}			__set_current_state(TASK_RUNNING);			shost->ehandler = NULL;			return 0;		}}

 

转载地址:http://ztlji.baihongyu.com/

你可能感兴趣的文章
93. Restore IP Addresses (DFS, String)
查看>>
19. Remove Nth Node From End of List (双指针)
查看>>
49. Group Anagrams (String, Map)
查看>>
139. Word Break (DP)
查看>>
23. Merge k Sorted Lists (Divide and conquer, Linked List) 以及java匿名内部类
查看>>
Tensorflow入门资料
查看>>
剑指_用两个栈实现队列
查看>>
剑指_顺时针打印矩阵
查看>>
剑指_栈的压入弹出序列
查看>>
剑指_复杂链表的复制
查看>>
服务器普通用户(非管理员账户)在自己目录下安装TensorFlow
查看>>
星环后台研发实习面经
查看>>
大数相乘不能用自带大数类型
查看>>
字节跳动后端开发一面
查看>>
CentOS Tensorflow 基础环境配置
查看>>
centOS7安装FTP
查看>>
FTP的命令
查看>>
CentOS操作系统下安装yum的方法
查看>>
ping 报name or service not known
查看>>
FTP 常见问题
查看>>